提交 089fe03d authored 作者: Roy Xue's avatar Roy Xue

Add comments and code style fix for the while file

上级 92306ee6
...@@ -93,7 +93,7 @@ def _atexit_print_fn(): ...@@ -93,7 +93,7 @@ def _atexit_print_fn():
if not isinstance(ps, ScanProfileStats): if not isinstance(ps, ScanProfileStats):
to_sum.append(ps) to_sum.append(ps)
else: else:
#TODO print the name if there is one! # TODO print the name if there is one!
print 'Skipping empty Profile' print 'Skipping empty Profile'
if len(to_sum) > 1: if len(to_sum) > 1:
# Make a global profile # Make a global profile
...@@ -107,7 +107,7 @@ def _atexit_print_fn(): ...@@ -107,7 +107,7 @@ def _atexit_print_fn():
"validate_time"]: "validate_time"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr)) setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
#merge dictonary # merge dictonary
for attr in ["apply_time", "apply_callcount", for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "variable_shape", "variable_strides"]: "apply_cimpl", "variable_shape", "variable_strides"]:
cum_attr = getattr(cum, attr) cum_attr = getattr(cum, attr)
...@@ -128,10 +128,8 @@ def _atexit_print_fn(): ...@@ -128,10 +128,8 @@ def _atexit_print_fn():
n_apply_to_print=config.profiling.n_apply) n_apply_to_print=config.profiling.n_apply)
class ProfileStats(object): class ProfileStats(object):
""" """
Object to store runtime and memory profiling information for all of Object to store runtime and memory profiling information for all of
Theano's operations: compilation, optimization, execution. Theano's operations: compilation, optimization, execution.
...@@ -572,12 +570,12 @@ class ProfileStats(object): ...@@ -572,12 +570,12 @@ class ProfileStats(object):
for var in a.inputs], for var in a.inputs],
[self.variable_shape[var] [self.variable_shape[var]
for var in a.outputs]) for var in a.outputs])
flops = '%8.1f' % (fl/1024./1024) flops = '%8.1f' % (fl / 1024. / 1024)
flops_s = '%10.1f' % (fl/1024./1024/1024/t) flops_s = '%10.1f' % (fl / 1024. / 1024 / 1024 / t)
else: else:
flops = " " flops = " "
flops_s = " " flops_s = " "
print >> file, format_str %(f, ftot, t, t / nb_call, nb_call, print >> file, format_str % (f, ftot, t, t / nb_call, nb_call,
nd_id, nd_id,
flops, flops_s, flops, flops_s,
str(a)[:maxlen]) str(a)[:maxlen])
...@@ -597,7 +595,7 @@ class ProfileStats(object): ...@@ -597,7 +595,7 @@ class ProfileStats(object):
idx, dtype, sh, st) idx, dtype, sh, st)
# Same as before, this I've sacrificied some information making # Same as before, this I've sacrificied some information making
# the output more readable # the output more readable
#print >> file, ' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %i %s'%( # print >> file, ' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %i %s'%(
# f, ftot, t, tot, t/nb_call,nb_call, str(a)) # f, ftot, t, tot, t/nb_call,nb_call, str(a))
print >> file, ' ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\ print >> file, ' ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\
% (max(0, len(atimes) - N), % (max(0, len(atimes) - N),
...@@ -618,7 +616,7 @@ class ProfileStats(object): ...@@ -618,7 +616,7 @@ class ProfileStats(object):
local_time = sum(self.apply_time.values()) local_time = sum(self.apply_time.values())
if local_time > 0: if local_time > 0:
print >> file, ' Time in thunks: %es (%.3f%%)' % ( print >> file, ' Time in thunks: %es (%.3f%%)' % (
local_time, 100*local_time / self.fct_call_time) local_time, 100 * local_time / self.fct_call_time)
print >> file, ' Total compile time: %es' % self.compile_time print >> file, ' Total compile time: %es' % self.compile_time
print >> file, ' Number of Apply nodes: %s' % len(self.apply_time) print >> file, ' Number of Apply nodes: %s' % len(self.apply_time)
print >> file, ' Theano Optimizer time: %es' % self.optimizer_time print >> file, ' Theano Optimizer time: %es' % self.optimizer_time
...@@ -656,7 +654,7 @@ class ProfileStats(object): ...@@ -656,7 +654,7 @@ class ProfileStats(object):
fct_shapes[node.fgraph][node].append(sh) fct_shapes[node.fgraph][node].append(sh)
node_mem[node] = sum_dense node_mem[node] = sum_dense
#Find the function that used the most of that statistic # Find the function that used the most of that statistic
max_sum_size = 0 max_sum_size = 0
max_node_memory_size = 0 max_node_memory_size = 0
max_running_max_memory_size = 0 max_running_max_memory_size = 0
...@@ -712,7 +710,8 @@ class ProfileStats(object): ...@@ -712,7 +710,8 @@ class ProfileStats(object):
for var in fgraph.variables: for var in fgraph.variables:
viewed_by[var] = [] viewed_by[var] = []
view_of = {} # {var1: original var viewed by var1} view_of = {} # {var1: original var viewed by var1}
# The orignal mean that we don't keep trac of all the intermediate relationship in the view. # The orignal mean that we don't keep trac of all the intermediate
# relationship in the view.
for node in order: for node in order:
for var in node.outputs: for var in node.outputs:
...@@ -738,12 +737,14 @@ class ProfileStats(object): ...@@ -738,12 +737,14 @@ class ProfileStats(object):
ins = None ins = None
if dmap and idx2 in dmap: if dmap and idx2 in dmap:
vidx = dmap[idx2] vidx = dmap[idx2]
assert len(vidx) == 1, "Here we only support the possibility to destroy one input" assert len(
vidx) == 1, "Here we only support the possibility to destroy one input"
ins = node.inputs[vidx[0]] ins = node.inputs[vidx[0]]
if vmap and idx2 in vmap: if vmap and idx2 in vmap:
assert ins is None assert ins is None
vidx = vmap[idx2] vidx = vmap[idx2]
assert len(vidx) == 1, "Here we only support the possibility to view one input" assert len(
vidx) == 1, "Here we only support the possibility to view one input"
ins = node.inputs[vidx[0]] ins = node.inputs[vidx[0]]
if ins is not None: if ins is not None:
# This is needed for destroy_map in case it # This is needed for destroy_map in case it
...@@ -859,12 +860,14 @@ class ProfileStats(object): ...@@ -859,12 +860,14 @@ class ProfileStats(object):
ins = None ins = None
if dmap and idx in dmap: if dmap and idx in dmap:
vidx = dmap[idx] vidx = dmap[idx]
assert len(vidx) == 1, "Here we only support the possibility to destroy one input" assert len(
vidx) == 1, "Here we only support the possibility to destroy one input"
ins = node.inputs[vidx[0]] ins = node.inputs[vidx[0]]
if vmap and idx in vmap: if vmap and idx in vmap:
assert ins is None assert ins is None
vidx = vmap[idx] vidx = vmap[idx]
assert len(vidx) == 1, "Here we only support the possibility to destroy one input" assert len(
vidx) == 1, "Here we only support the possibility to destroy one input"
ins = node.inputs[vidx[0]] ins = node.inputs[vidx[0]]
if ins is not None: if ins is not None:
# This is needed for destroy_map in case it # This is needed for destroy_map in case it
...@@ -889,7 +892,8 @@ class ProfileStats(object): ...@@ -889,7 +892,8 @@ class ProfileStats(object):
for ins in node.inputs: for ins in node.inputs:
assert not (ins in view_of and assert not (ins in view_of and
viewed_by[ins]) viewed_by[ins])
# We track of the original var, so this shouldn't happen # We track of the original var, so this shouldn't
# happen
if (dependencies[ins] and if (dependencies[ins] and
ins not in fgraph.outputs and ins not in fgraph.outputs and
ins.owner and ins.owner and
...@@ -913,7 +917,12 @@ class ProfileStats(object): ...@@ -913,7 +917,12 @@ class ProfileStats(object):
done_set.add(node) done_set.add(node)
frozen_set = frozenset(done_set) frozen_set = frozenset(done_set)
if done_dict.get(frozen_set, max_mem_count+1) > max_mem_count: if done_dict.get(frozen_set, max_mem_count + 1) > max_mem_count:
# check if frozen_set is in done_set
# no, add it to done_set
# yes, then compare the past mem and current mem
# bigger, update the value and continue
# smaller, stop this iteration, move to next node
done_dict[frozen_set] = max_mem_count done_dict[frozen_set] = max_mem_count
for var in node.outputs: for var in node.outputs:
...@@ -928,7 +937,8 @@ class ProfileStats(object): ...@@ -928,7 +937,8 @@ class ProfileStats(object):
if max_mem_count < mem_bound: if max_mem_count < mem_bound:
mem_bound = max_mem_count mem_bound = max_mem_count
else: else:
min_memory_generator(new_exec_nodes, viewed_by, view_of) min_memory_generator(
new_exec_nodes, viewed_by, view_of)
# Reset track variables # Reset track variables
done_set.remove(node) done_set.remove(node)
...@@ -949,7 +959,6 @@ class ProfileStats(object): ...@@ -949,7 +959,6 @@ class ProfileStats(object):
for k in viewof_change: for k in viewof_change:
del view_of[k] del view_of[k]
# two data structure used to mimic Python gc # two data structure used to mimic Python gc
viewed_by = {} # {var1: [vars that view var1]} viewed_by = {} # {var1: [vars that view var1]}
# The len of the list is the value of python ref count. But we use a list, not just the ref count value. # The len of the list is the value of python ref count. But we use a list, not just the ref count value.
...@@ -957,7 +966,8 @@ class ProfileStats(object): ...@@ -957,7 +966,8 @@ class ProfileStats(object):
for var in fgraph.variables: for var in fgraph.variables:
viewed_by[var] = [] viewed_by[var] = []
view_of = {} # {var1: original var viewed by var1} view_of = {} # {var1: original var viewed by var1}
# The orignal mean that we don't keep trac of all the intermediate relationship in the view. # The orignal mean that we don't keep trac of all the intermediate
# relationship in the view.
min_memory_generator(executable_nodes, viewed_by, view_of) min_memory_generator(executable_nodes, viewed_by, view_of)
...@@ -1039,7 +1049,8 @@ class ProfileStats(object): ...@@ -1039,7 +1049,8 @@ class ProfileStats(object):
round(new_max_node_memory_saved_by_view / 1024.)), int( round(new_max_node_memory_saved_by_view / 1024.)), int(
round(max_node_memory_saved_by_view / 1024.))) round(max_node_memory_saved_by_view / 1024.)))
print >> file, " Memory saved if inplace ops are used: %dKB (%dKB)" % \ print >> file, " Memory saved if inplace ops are used: %dKB (%dKB)" % \
(int(round(new_max_node_memory_saved_by_inplace / 1024.)), int(round(max_node_memory_saved_by_inplace / 1024.))) (int(round(new_max_node_memory_saved_by_inplace / 1024.)),
int(round(max_node_memory_saved_by_inplace / 1024.)))
print >> file, " Memory saved if gc is enabled: %dKB (%dKB)" % (int( print >> file, " Memory saved if gc is enabled: %dKB (%dKB)" % (int(
round(new_max_node_memory_size - new_max_running_max_memory_size) / 1024.), int( round(new_max_node_memory_size - new_max_running_max_memory_size) / 1024.), int(
round(max_node_memory_size - max_running_max_memory_size) / 1024.)) round(max_node_memory_size - max_running_max_memory_size) / 1024.))
...@@ -1112,7 +1123,6 @@ class ProfileStats(object): ...@@ -1112,7 +1123,6 @@ class ProfileStats(object):
" emitted in those cases.") " emitted in those cases.")
print >> file, '' print >> file, ''
def summary(self, file=sys.stderr, n_ops_to_print=20, def summary(self, file=sys.stderr, n_ops_to_print=20,
n_apply_to_print=20): n_apply_to_print=20):
self.summary_function(file) self.summary_function(file)
...@@ -1133,8 +1143,6 @@ class ProfileStats(object): ...@@ -1133,8 +1143,6 @@ class ProfileStats(object):
self.optimizer_profile[1]) self.optimizer_profile[1])
if 0: # old code still to be ported from ProfileMode if 0: # old code still to be ported from ProfileMode
def long_print(self, file=sys.stderr, fct_name=None, message=None, def long_print(self, file=sys.stderr, fct_name=None, message=None,
n_apply_to_print=15, n_ops_to_print=20, print_apply=False): n_apply_to_print=15, n_ops_to_print=20, print_apply=False):
...@@ -1159,7 +1167,7 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1159,7 +1167,7 @@ if 0: # old code still to be ported from ProfileMode
sop_time = {} sop_time = {}
sop_call = {} sop_call = {}
sop_op = {} sop_op = {}
#map each op class to Bool. True iff all applies were done in c. # map each op class to Bool. True iff all applies were done in c.
sop_c = {} sop_c = {}
for a, t in op_time.items(): for a, t in op_time.items():
typ = type(a) typ = type(a)
...@@ -1186,7 +1194,7 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1186,7 +1194,7 @@ if 0: # old code still to be ported from ProfileMode
msg = '*' msg = '*'
else: else:
msg = ' ' msg = ' '
print ' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %5d %2d %s' % (f, ftot, t, tot, t/nb_call, msg, nb_call, nb_op, a) print ' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %5d %2d %s' % (f, ftot, t, tot, t / nb_call, msg, nb_call, nb_op, a)
print ' ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\ print ' ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\
% (max(0, len(sotimes) - n_ops_to_print), % (max(0, len(sotimes) - n_ops_to_print),
sum(f for f, t, a, ci, nb_call, nb_op in sum(f for f, t, a, ci, nb_call, nb_op in
...@@ -1202,13 +1210,13 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1202,13 +1210,13 @@ if 0: # old code still to be ported from ProfileMode
print 'Theano fct summary: <% total fct time> <total time> <time per call> <nb call> <fct name>' print 'Theano fct summary: <% total fct time> <total time> <time per call> <nb call> <fct name>'
for key in fct_call.keys(): for key in fct_call.keys():
if fct_call[key] > 0: if fct_call[key] > 0:
print ' %4.1f%% %.3fs %.2es %d %s'%( print ' %4.1f%% %.3fs %.2es %d %s' % (
fct_call_time[key] / total_fct_time * 100, fct_call_time[key] / total_fct_time * 100,
fct_call_time[key], fct_call_time[key],
fct_call_time[key] / fct_call[key], fct_call_time[key] / fct_call[key],
fct_call[key], key.name) fct_call[key], key.name)
else: else:
print ' NOT CALLED',key.name print ' NOT CALLED', key.name
if total_fct_time > 0: if total_fct_time > 0:
time_pr_in_fct = local_time / total_fct_time * 100 time_pr_in_fct = local_time / total_fct_time * 100
...@@ -1227,8 +1235,8 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1227,8 +1235,8 @@ if 0: # old code still to be ported from ProfileMode
print (' Theano Op time (included in fct call, Time spent ' print (' Theano Op time (included in fct call, Time spent '
'running thunks) %.3fs %.1f%%(of total) %.1f%%(of fct call)' % 'running thunks) %.3fs %.1f%%(of total) %.1f%%(of fct call)' %
(local_time, local_time / total_time * 100, time_pr_in_fct)) (local_time, local_time / total_time * 100, time_pr_in_fct))
print 'Other time since import %.3fs %.1f%%'%(other_time,other_time/total_time*100) print 'Other time since import %.3fs %.1f%%' % (other_time, other_time / total_time * 100)
print '%i Theano fct call, %.3fs per call'%(total_fct_call, time_per_call) print '%i Theano fct call, %.3fs per call' % (total_fct_call, time_per_call)
print print
print "List of apply that don't have float64 as input but have float64 in outputs. Usefull to know if we forgot some cast when using floatX=float32 or gpu code." print "List of apply that don't have float64 as input but have float64 in outputs. Usefull to know if we forgot some cast when using floatX=float32 or gpu code."
...@@ -1236,7 +1244,7 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1236,7 +1244,7 @@ if 0: # old code still to be ported from ProfileMode
for fct in fct_call.keys(): for fct in fct_call.keys():
for idx, node in enumerate(fct.maker.fgraph.toposort()): for idx, node in enumerate(fct.maker.fgraph.toposort()):
if any(hasattr(i, 'dtype') and i.dtype == 'float64' for i in node.outputs) and not any(hasattr(i, 'dtype') and i.dtype == 'float64' for i in node.inputs): if any(hasattr(i, 'dtype') and i.dtype == 'float64' for i in node.outputs) and not any(hasattr(i, 'dtype') and i.dtype == 'float64' for i in node.inputs):
print str(node), idx, fct.name, str([getattr(i,'dtype',None) for i in node.inputs]),str([getattr(i,'dtype',None) for i in node.outputs]) print str(node), idx, fct.name, str([getattr(i, 'dtype', None) for i in node.inputs]), str([getattr(i, 'dtype', None) for i in node.outputs])
if any([x[2].__name__.startswith("Gpu") for x in sotimes]): if any([x[2].__name__.startswith("Gpu") for x in sotimes]):
cpu = [] cpu = []
...@@ -1254,8 +1262,8 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1254,8 +1262,8 @@ if 0: # old code still to be ported from ProfileMode
sum_trans = sum(so[1] for so in trans) sum_trans = sum(so[1] for so in trans)
print print
print "Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op"%( print "Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op" % (
sum_cpu, sum_cpu/local_time*100, sum_gpu, sum_gpu/local_time*100, sum_trans, sum_trans/local_time*100) sum_cpu, sum_cpu / local_time * 100, sum_gpu, sum_gpu / local_time * 100, sum_trans, sum_trans / local_time * 100)
print "Theano function input that are float64" print "Theano function input that are float64"
print "<fct name> <input name> <input type> <str input>" print "<fct name> <input name> <input type> <str input>"
...@@ -1270,20 +1278,25 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1270,20 +1278,25 @@ if 0: # old code still to be ported from ProfileMode
from theano.tensor.raw_random import RandomFunction from theano.tensor.raw_random import RandomFunction
import theano import theano
import theano.scalar as scal import theano.scalar as scal
scalar_op_amdlibm_no_speed_up = [scal.LT, scal.GT, scal.LE, scal.GE, scal.EQ, scal.NEQ, scal.InRange, scal.Switch, scal.OR, scal.XOR, scal.AND, scal.Invert, scal.Maximum, scal.Minimum, scal.Add, scal.Mul, scal.Sub, scal.TrueDiv, scal.IntDiv, scal.Clip, scal.First, scal.Second, scal.Identity, scal.Cast, scal.Sgn, scal.Neg, scal.Inv, scal.Sqr ] scalar_op_amdlibm_no_speed_up = [scal.LT, scal.GT, scal.LE, scal.GE, scal.EQ, scal.NEQ, scal.InRange, scal.Switch, scal.OR, scal.XOR, scal.AND, scal.Invert, scal.Maximum,
scalar_op_amdlibm_speed_up = [scal.Mod, scal.Pow, scal.Ceil, scal.Floor, scal.RoundHalfToEven, scal.RoundHalfAwayFromZero, scal.Log, scal.Log2, scal.Log10, scal.Log1p, scal.Exp, scal.Sqrt, scal.Abs, scal.Cos, scal.Sin, scal.Tan, scal.Tanh, scal.Cosh, scal.Sinh, T.nnet.sigm.ScalarSigmoid, T.nnet.sigm.ScalarSoftplus ]#Abs, Mod in float{32,64} only scal.Minimum, scal.Add, scal.Mul, scal.Sub, scal.TrueDiv, scal.IntDiv, scal.Clip, scal.First, scal.Second, scal.Identity, scal.Cast, scal.Sgn, scal.Neg, scal.Inv, scal.Sqr]
scalar_op_amdlibm_speed_up = [scal.Mod, scal.Pow, scal.Ceil, scal.Floor, scal.RoundHalfToEven, scal.RoundHalfAwayFromZero, scal.Log, scal.Log2, scal.Log10, scal.Log1p, scal.Exp,
scal.Sqrt, scal.Abs, scal.Cos, scal.Sin, scal.Tan, scal.Tanh, scal.Cosh, scal.Sinh, T.nnet.sigm.ScalarSigmoid, T.nnet.sigm.ScalarSoftplus] # Abs, Mod in float{32,64} only
def get_scalar_ops(s): def get_scalar_ops(s):
if isinstance(s, theano.scalar.Composite): if isinstance(s, theano.scalar.Composite):
l = [] l = []
for node in s.fgraph.toposort(): for node in s.fgraph.toposort():
l+=get_scalar_ops(node.op) l += get_scalar_ops(node.op)
return l return l
else: return [s] else:
return [s]
def list_scalar_op(op): def list_scalar_op(op):
if isinstance(op.scalar_op, theano.scalar.Composite): if isinstance(op.scalar_op, theano.scalar.Composite):
return get_scalar_ops(op.scalar_op) return get_scalar_ops(op.scalar_op)
else: return [op.scalar_op] else:
return [op.scalar_op]
def amdlibm_speed_up(op): def amdlibm_speed_up(op):
if not isinstance(op, T.Elemwise): if not isinstance(op, T.Elemwise):
...@@ -1296,6 +1309,7 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1296,6 +1309,7 @@ if 0: # old code still to be ported from ProfileMode
elif s_op.__class__ not in scalar_op_amdlibm_no_speed_up: elif s_op.__class__ not in scalar_op_amdlibm_no_speed_up:
print "We don't know if amdlibm will accelerate this scalar op.", s_op print "We don't know if amdlibm will accelerate this scalar op.", s_op
return False return False
def exp_float32_op(op): def exp_float32_op(op):
if not isinstance(op, T.Elemwise): if not isinstance(op, T.Elemwise):
return False return False
...@@ -1303,19 +1317,19 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1303,19 +1317,19 @@ if 0: # old code still to be ported from ProfileMode
l = list_scalar_op(op) l = list_scalar_op(op)
return any([s_op.__class__ in [scal.Exp] for s_op in l]) return any([s_op.__class__ in [scal.Exp] for s_op in l])
#tip 1 # tip 1
if config.floatX=='float64': if config.floatX == 'float64':
print " - Try the Theano flag floatX=float32" print " - Try the Theano flag floatX=float32"
#tip 2 # tip 2
if not config.lib.amdlibm and any([amdlibm_speed_up(a.op) for i,a in apply_time]): if not config.lib.amdlibm and any([amdlibm_speed_up(a.op) for i, a in apply_time]):
print " - Try installing amdlibm and set the Theano flag lib.amdlibm=True. This speed up only some Elemwise operation." print " - Try installing amdlibm and set the Theano flag lib.amdlibm=True. This speed up only some Elemwise operation."
#tip 3 # tip 3
if not config.lib.amdlibm and any([exp_float32_op(a.op) and a.inputs[0].dtype=='float32' for i,a in apply_time]): if not config.lib.amdlibm and any([exp_float32_op(a.op) and a.inputs[0].dtype == 'float32' for i, a in apply_time]):
print " - With the default gcc libm, exp in float32 is slower than in float64! Try Theano flags floatX=float64 or install amdlibm and set the theano flags lib.amdlibm=True" print " - With the default gcc libm, exp in float32 is slower than in float64! Try Theano flags floatX=float64 or install amdlibm and set the theano flags lib.amdlibm=True"
#tip 4 # tip 4
for a, t in apply_time.iteritems(): for a, t in apply_time.iteritems():
node = a node = a
if (isinstance(node.op, T.Dot) and if (isinstance(node.op, T.Dot) and
...@@ -1326,7 +1340,7 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1326,7 +1340,7 @@ if 0: # old code still to be ported from ProfileMode
"Currently they are: %s" % "Currently they are: %s" %
[i.type for i in node.inputs]) [i.type for i in node.inputs])
#tip 5 # tip 5
for a, t in apply_time.iteritems(): for a, t in apply_time.iteritems():
node = a node = a
if isinstance(node.op, RandomFunction): if isinstance(node.op, RandomFunction):
...@@ -1414,7 +1428,7 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1414,7 +1428,7 @@ if 0: # old code still to be ported from ProfileMode
tb = b_time.pop(a, 0) tb = b_time.pop(a, 0)
r[a] += ta - tb r[a] += ta - tb
#they are missing in a # they are missing in a
for a, t in b_time.items(): for a, t in b_time.items():
r.setdefault(a, 0) r.setdefault(a, 0)
r[a] += t r[a] += t
...@@ -1435,8 +1449,6 @@ if 0: # old code still to be ported from ProfileMode ...@@ -1435,8 +1449,6 @@ if 0: # old code still to be ported from ProfileMode
n_ops_to_print=n_ops_to_print, print_apply=False) n_ops_to_print=n_ops_to_print, print_apply=False)
class ScanProfileStats(ProfileStats): class ScanProfileStats(ProfileStats):
callcount = 0.0 callcount = 0.0
nbsteps = 0.0 nbsteps = 0.0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论