Print the flops with the memory profiler.

5529c62f · Frederic Bastien · Frederic · 11c4882a · 5529c62f · 5529c62f
--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -532,6 +532,12 @@ class ProfileStats(object):
        hs += ['<id>']
        es += ['%3d']

+        if self.variable_shape:
+            hs += ['<Mflops>']
+            es += ['%.1f']
+        else:
+            es += ['%s']
+
        upto_length = numpy.sum([len(x) for x in hs]) + len(hs)
        maxlen = self.line_width - upto_length
        hs += ['<Apply name>']
@@ -557,8 +563,19 @@ class ProfileStats(object):
            ftot = tot * 100 / local_time
            if nb_call == 0:
                continue
+            if not self.variable_shape:
+                flops = ""
+            elif hasattr(a.op, 'flops'):
+                flops = a.op.flops([self.variable_shape[var]
+                                    for var in a.inputs],
+                                   [self.variable_shape[var]
+                                    for var in a.outputs])
+                flops = flops/1024./1024
+            else:
+                flops = -1
            print >> file, format_str %(f, ftot, t, t / nb_call, nb_call,
                                        nd_id,
+                                        flops,
                                        str(a)[:maxlen])
            if not config.profile_memory:
                continue

--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -605,6 +605,25 @@ class ConvOp(OpenMPOp):
                    self.kshp[0] * self.kshp[1] * \
                        self.imshp[1] * self.imshp[2] * 2

+    def flops(self, inputs, outputs):
+        """ Useful with the hack in profilemode to print the MFlops"""
+        images, kerns = inputs
+        out, = outputs
+        assert images[1] == kerns[1]
+        flops = 0
+        if self.out_mode == "valid":
+            # nb mul and add by output pixed
+            flops = kerns[2] * kerns[3] * 2
+            #nb flops by output image
+            flops *= out[2] * out[3]
+            # for all outputs images#n_stack==self.imshp[0]
+            flops *= images[1] * kerns[0] * images[0]
+        else:  # full mode not implemented
+            flops = (images[0] * kerns[0] * images[1] *
+                     kerns[2] * kerns[3] *
+                     images[2] * images[3] * 2)
+        return flops
+
    def make_node(self, inputs, kerns):
        # TODO: find a way to make ConvOp work for N-D (after NIPS09)
        """