提交 5529c62f authored 作者: Frederic Bastien's avatar Frederic Bastien 提交者: Frederic

Print the flops with the memory profiler.

上级 11c4882a
......@@ -532,6 +532,12 @@ class ProfileStats(object):
hs += ['<id>']
es += ['%3d']
if self.variable_shape:
hs += ['<Mflops>']
es += ['%.1f']
else:
es += ['%s']
upto_length = numpy.sum([len(x) for x in hs]) + len(hs)
maxlen = self.line_width - upto_length
hs += ['<Apply name>']
......@@ -557,8 +563,19 @@ class ProfileStats(object):
ftot = tot * 100 / local_time
if nb_call == 0:
continue
if not self.variable_shape:
flops = ""
elif hasattr(a.op, 'flops'):
flops = a.op.flops([self.variable_shape[var]
for var in a.inputs],
[self.variable_shape[var]
for var in a.outputs])
flops = flops/1024./1024
else:
flops = -1
print >> file, format_str %(f, ftot, t, t / nb_call, nb_call,
nd_id,
flops,
str(a)[:maxlen])
if not config.profile_memory:
continue
......
......@@ -605,6 +605,25 @@ class ConvOp(OpenMPOp):
self.kshp[0] * self.kshp[1] * \
self.imshp[1] * self.imshp[2] * 2
def flops(self, inputs, outputs):
""" Useful with the hack in profilemode to print the MFlops"""
images, kerns = inputs
out, = outputs
assert images[1] == kerns[1]
flops = 0
if self.out_mode == "valid":
# nb mul and add by output pixed
flops = kerns[2] * kerns[3] * 2
#nb flops by output image
flops *= out[2] * out[3]
# for all outputs images#n_stack==self.imshp[0]
flops *= images[1] * kerns[0] * images[0]
else: # full mode not implemented
flops = (images[0] * kerns[0] * images[1] *
kerns[2] * kerns[3] *
images[2] * images[3] * 2)
return flops
def make_node(self, inputs, kerns):
# TODO: find a way to make ConvOp work for N-D (after NIPS09)
"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论