in profile mode, print the time per op call.

487bfc2b · Frederic Bastien · 14fd48fc · 487bfc2b
--- a/theano/compile/profilemode.py
+++ b/theano/compile/profilemode.py
@@ -182,14 +182,14 @@ class ProfileMode(Mode):
        print 'local_time %fs (Time spent running thunks)'% local_time

        if print_apply:
-            print 'Apply-wise summary: <% of local_time spent at this position> <total of local_time spent at this position> <nb_call> <Apply position> <Apply Op name>'
+            print 'Apply-wise summary: <% of local_time spent at this position> <cumulative seconds> <apply time> <time per call> <nb_call> <Apply position> <Apply Op name>'
            atimes = [(t/local_time, t, (a[0], str(a[1])), apply_call[a]) for a, t in apply_time.items()]
            atimes.sort()
            atimes.reverse()
            tot=0
            for f,t,a,nb_call in atimes[:n_apply_to_print]:
                tot+=t
-                print '   %4.1f%%  %.3fs  %.3fs  %i  %i %s' % (f*100, tot, t, nb_call, a[0], a[1])
+                print '   %4.1f%%  %.3fs  %.3fs %.2es  %i  %i %s' % (f*100, tot, t, t/nb_call,nb_call, a[0], a[1])
            print '   ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\
                    %(max(0, len(atimes)-n_apply_to_print),
                      sum(f for f, t, a, nb_call in atimes[n_apply_to_print:])*100,
@@ -199,7 +199,7 @@ class ProfileMode(Mode):
        if op_flops:
            flops_msg=' <MFlops/s>'
            print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!'
-        print '\nOp-wise summary: < of local_time spent on this kind of Op> <cumulative seconds> <self seconds>%s <nb_call> <Op name>'%(flops_msg)
+        print '\nOp-wise summary: <%% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <time per call> %s <nb_call> <Op name>'%(flops_msg)

        otimes = [(t/local_time, t, a, op_cimpl[a], op_call[a]) for a, t in op_time.items()]
        otimes.sort()
@@ -212,9 +212,9 @@ class ProfileMode(Mode):
            else:
              msg = ' '
            if op_flops:
-                print '   %4.1f%%  %.3fs  %.3fs  %s %7.1f %d %s' % (f*100, tot, t, msg, op_flops.get(a,-1), nb_call, a)
+                print '   %4.1f%%  %.3fs  %.3fs  %.2es %s %7.1f %d %s' % (f*100, tot, t, t/nb_call, msg, op_flops.get(a,-1), nb_call, a)
            else:
-                print '   %4.1f%%  %.3fs  %.3fs  %s %d %s' % (f*100, tot, t, msg, nb_call, a)
+                print '   %4.1f%%  %.3fs  %.3fs  %.2es  %s %d %s' % (f*100, tot, t, t/nb_call, msg, nb_call, a)
        print '   ... (remaining %i Ops account for %6.2f%%(%.2fs) of the runtime)'\
                %(max(0, len(otimes)-n_ops_to_print),
                  sum(f for f, t, a, ci, nb_call in otimes[n_ops_to_print:])*100,
@@ -231,7 +231,7 @@ class ProfileMode(Mode):
            sop_c.setdefault(type(a),True)
            sop_c[type(a)]=sop_c[type(a)] and op_cimpl[a]
            sop_call[type(a)]=sop_call.get(type(a),0)+op_call[a]
-        print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <nb_call> <Op name>'
+        print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <time per call> <nb_call> <Op name>'
        sotimes = [(t/local_time, t, a, sop_c[a], sop_call[a]) for a, t in sop_time.items()]
        sotimes.sort()
        sotimes.reverse()
@@ -242,7 +242,7 @@ class ProfileMode(Mode):
              msg = '*'
            else:
              msg = ' '
-            print '   %4.1f%%  %.3fs  %.3fs  %s %d %s' % (f*100, tot, t, msg, nb_call, a)
+            print '   %4.1f%%  %.3fs  %.3fs  %.2es %s %d %s' % (f*100, tot, t, t/nb_call, msg, nb_call, a)
        print '   ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\
                %(max(0, len(sotimes)-n_ops_to_print),
                  sum(f for f, t, a, ci, nb_call in sotimes[n_ops_to_print:])*100,