提交 436ce734 authored 作者: Frederic's avatar Frederic

Make GPUConv compute the flops used.

上级 7dd9f409
......@@ -621,6 +621,25 @@ class GpuConv(GpuOp):
False, False]
return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
def flops(self, inputs, outputs):
""" Useful with the hack in profilemode to print the MFlops"""
images, kerns = inputs
out, = outputs
assert images[1] == kerns[1]
flops = 0
if self.out_mode == "valid":
# nb mul and add by output pixed
flops = kerns[2] * kerns[3] * 2
#nb flops by output image
flops *= out[2] * out[3]
# for all outputs images#n_stack==self.imshp[0]
flops *= images[1] * kerns[0] * images[0]
else:
flops = (images[0] * kerns[0] * images[1] *
kerns[2] * kerns[3] *
images[2] * images[3] * 2)
return flops
def make_thunk(self, node, storage_map, compute_map, no_recycling):
node_ = copy.copy(node)
assert node.op is node_.op
......
......@@ -578,7 +578,7 @@ class ConvOp(OpenMPOp):
flops *= out[2] * out[3]
# for all outputs images#n_stack==self.imshp[0]
flops *= images[1] * kerns[0] * images[0]
else: # full mode not implemented
else:
flops = (images[0] * kerns[0] * images[1] *
kerns[2] * kerns[3] *
images[2] * images[3] * 2)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论