Add comments and code style fix for the while file

089fe03d · Roy Xue · 92306ee6 · 089fe03d
--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -93,7 +93,7 @@ def _atexit_print_fn():
            if not isinstance(ps, ScanProfileStats):
                to_sum.append(ps)
        else:
-            #TODO print the name if there is one!
+            # TODO print the name if there is one!
            print 'Skipping empty Profile'
    if len(to_sum) > 1:
        # Make a global profile
@@ -107,7 +107,7 @@ def _atexit_print_fn():
                         "validate_time"]:
                setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
-            #merge dictonary
+            # merge dictonary
            for attr in ["apply_time", "apply_callcount",
                         "apply_cimpl", "variable_shape", "variable_strides"]:
                cum_attr = getattr(cum, attr)
@@ -128,10 +128,8 @@ def _atexit_print_fn():
                    n_apply_to_print=config.profiling.n_apply)
 class ProfileStats(object):
    """
    Object to store runtime and memory profiling information for all of
    Theano's operations: compilation, optimization, execution.
@@ -572,12 +570,12 @@ class ProfileStats(object):
                                 for var in a.inputs],
                                [self.variable_shape[var]
                                 for var in a.outputs])
-                flops = '%8.1f' % (fl/1024./1024)
+                flops = '%8.1f' % (fl / 1024. / 1024)
-                flops_s = '%10.1f' % (fl/1024./1024/1024/t)
+                flops_s = '%10.1f' % (fl / 1024. / 1024 / 1024 / t)
            else:
                flops = "        "
                flops_s = "          "
-            print >> file, format_str %(f, ftot, t, t / nb_call, nb_call,
+            print >> file, format_str % (f, ftot, t, t / nb_call, nb_call,
                                         nd_id,
                                         flops, flops_s,
                                         str(a)[:maxlen])
@@ -597,7 +595,7 @@ class ProfileStats(object):
                    idx, dtype, sh, st)
            # Same as before, this I've sacrificied some information making
            # the output more readable
-            #print >> file, '   %4.1f%%  %5.1f%%  %5.3fs  %5.3fs %.2es  %i  %s'%(
+            # print >> file, '   %4.1f%%  %5.1f%%  %5.3fs  %5.3fs %.2es  %i  %s'%(
            #        f, ftot, t, tot, t/nb_call,nb_call, str(a))
        print >> file, '   ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\
            % (max(0, len(atimes) - N),
@@ -618,7 +616,7 @@ class ProfileStats(object):
            local_time = sum(self.apply_time.values())
            if local_time > 0:
                print >> file, '  Time in thunks: %es (%.3f%%)' % (
-                        local_time, 100*local_time / self.fct_call_time)
+                    local_time, 100 * local_time / self.fct_call_time)
        print >> file, '  Total compile time: %es' % self.compile_time
        print >> file, '    Number of Apply nodes: %s' % len(self.apply_time)
        print >> file, '    Theano Optimizer time: %es' % self.optimizer_time
@@ -656,7 +654,7 @@ class ProfileStats(object):
                fct_shapes[node.fgraph][node].append(sh)
            node_mem[node] = sum_dense
-        #Find the function that used the most of that statistic
+        # Find the function that used the most of that statistic
        max_sum_size = 0
        max_node_memory_size = 0
        max_running_max_memory_size = 0
@@ -712,7 +710,8 @@ class ProfileStats(object):
            for var in fgraph.variables:
                viewed_by[var] = []
            view_of = {}  # {var1: original var viewed by var1}
-            # The orignal mean that we don't keep trac of all the intermediate relationship in the view.
+            # The orignal mean that we don't keep trac of all the intermediate
+            # relationship in the view.
            for node in order:
                for var in node.outputs:
@@ -738,12 +737,14 @@ class ProfileStats(object):
                    ins = None
                    if dmap and idx2 in dmap:
                        vidx = dmap[idx2]
-                        assert len(vidx) == 1, "Here we only support the possibility to destroy one input"
+                        assert len(
+                            vidx) == 1, "Here we only support the possibility to destroy one input"
                        ins = node.inputs[vidx[0]]
                    if vmap and idx2 in vmap:
                        assert ins is None
                        vidx = vmap[idx2]
-                        assert len(vidx) == 1, "Here we only support the possibility to view one input"
+                        assert len(
+                            vidx) == 1, "Here we only support the possibility to view one input"
                        ins = node.inputs[vidx[0]]
                    if ins is not None:
                        # This is needed for destroy_map in case it
@@ -859,12 +860,14 @@ class ProfileStats(object):
                        ins = None
                        if dmap and idx in dmap:
                            vidx = dmap[idx]
-                            assert len(vidx) == 1, "Here we only support the possibility to destroy one input"
+                            assert len(
+                                vidx) == 1, "Here we only support the possibility to destroy one input"
                            ins = node.inputs[vidx[0]]
                        if vmap and idx in vmap:
                            assert ins is None
                            vidx = vmap[idx]
-                            assert len(vidx) == 1, "Here we only support the possibility to destroy one input"
+                            assert len(
+                                vidx) == 1, "Here we only support the possibility to destroy one input"
                            ins = node.inputs[vidx[0]]
                        if ins is not None:
                            # This is needed for destroy_map in case it
@@ -889,7 +892,8 @@ class ProfileStats(object):
                    for ins in node.inputs:
                        assert not (ins in view_of and
                                    viewed_by[ins])
-                        # We track of the original var, so this shouldn't happen
+                        # We track of the original var, so this shouldn't
+                        # happen
                        if (dependencies[ins] and
                                ins not in fgraph.outputs and
                                ins.owner and
@@ -913,7 +917,12 @@ class ProfileStats(object):
                    done_set.add(node)
                    frozen_set = frozenset(done_set)
-                    if done_dict.get(frozen_set, max_mem_count+1) > max_mem_count:
+                    if done_dict.get(frozen_set, max_mem_count + 1) > max_mem_count:
+                        # check if frozen_set is in done_set
+                        # no, add it to done_set
+                        # yes, then compare the past mem and current mem
+                        # bigger, update the value and continue
+                        # smaller, stop this iteration, move to next node
                        done_dict[frozen_set] = max_mem_count
                        for var in node.outputs:
@@ -928,7 +937,8 @@ class ProfileStats(object):
                            if max_mem_count < mem_bound:
                                mem_bound = max_mem_count
                        else:
-                            min_memory_generator(new_exec_nodes, viewed_by, view_of)
+                            min_memory_generator(
+                                new_exec_nodes, viewed_by, view_of)
                    # Reset track variables
                    done_set.remove(node)
@@ -949,7 +959,6 @@ class ProfileStats(object):
                    for k in viewof_change:
                        del view_of[k]
            # two data structure used to mimic Python gc
            viewed_by = {}  # {var1: [vars that view var1]}
            # The len of the list is the value of python ref count. But we use a list, not just the ref count value.
@@ -957,7 +966,8 @@ class ProfileStats(object):
            for var in fgraph.variables:
                viewed_by[var] = []
            view_of = {}  # {var1: original var viewed by var1}
-            # The orignal mean that we don't keep trac of all the intermediate relationship in the view.
+            # The orignal mean that we don't keep trac of all the intermediate
+            # relationship in the view.
            min_memory_generator(executable_nodes, viewed_by, view_of)
@@ -1039,7 +1049,8 @@ class ProfileStats(object):
            round(new_max_node_memory_saved_by_view / 1024.)), int(
            round(max_node_memory_saved_by_view / 1024.)))
        print >> file,  "    Memory saved if inplace ops are used: %dKB (%dKB)" % \
-            (int(round(new_max_node_memory_saved_by_inplace / 1024.)), int(round(max_node_memory_saved_by_inplace / 1024.)))
+            (int(round(new_max_node_memory_saved_by_inplace / 1024.)),
+             int(round(max_node_memory_saved_by_inplace / 1024.)))
        print >> file,  "    Memory saved if gc is enabled: %dKB (%dKB)" % (int(
            round(new_max_node_memory_size - new_max_running_max_memory_size) / 1024.), int(
            round(max_node_memory_size - max_running_max_memory_size) / 1024.))
@@ -1112,7 +1123,6 @@ class ProfileStats(object):
                         " emitted in those cases.")
        print >> file, ''
    def summary(self, file=sys.stderr, n_ops_to_print=20,
                n_apply_to_print=20):
        self.summary_function(file)
@@ -1133,8 +1143,6 @@ class ProfileStats(object):
                                                    self.optimizer_profile[1])
 if 0:  # old code still to be ported from ProfileMode
    def long_print(self, file=sys.stderr, fct_name=None, message=None,
                   n_apply_to_print=15, n_ops_to_print=20, print_apply=False):
@@ -1159,7 +1167,7 @@ if 0: # old code still to be ported from ProfileMode
        sop_time = {}
        sop_call = {}
        sop_op = {}
-        #map each op class to Bool. True iff all applies were done in c.
+        # map each op class to Bool. True iff all applies were done in c.
        sop_c = {}
        for a, t in op_time.items():
            typ = type(a)
@@ -1186,7 +1194,7 @@ if 0: # old code still to be ported from ProfileMode
                msg = '*'
            else:
                msg = ' '
-            print '   %4.1f%%  %5.1f%%  %5.3fs  %5.3fs  %.2es %s %5d %2d %s' % (f, ftot, t, tot, t/nb_call, msg, nb_call, nb_op, a)
+            print '   %4.1f%%  %5.1f%%  %5.3fs  %5.3fs  %.2es %s %5d %2d %s' % (f, ftot, t, tot, t / nb_call, msg, nb_call, nb_op, a)
        print '   ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\
            % (max(0, len(sotimes) - n_ops_to_print),
               sum(f for f, t, a, ci, nb_call, nb_op in
@@ -1202,13 +1210,13 @@ if 0: # old code still to be ported from ProfileMode
        print 'Theano fct summary: <% total fct time> <total time> <time per call> <nb call> <fct name>'
        for key in fct_call.keys():
            if fct_call[key] > 0:
-                print '   %4.1f%% %.3fs %.2es %d %s'%(
+                print '   %4.1f%% %.3fs %.2es %d %s' % (
                    fct_call_time[key] / total_fct_time * 100,
                    fct_call_time[key],
                    fct_call_time[key] / fct_call[key],
                    fct_call[key], key.name)
            else:
-                print '   NOT CALLED',key.name
+                print '   NOT CALLED', key.name
        if total_fct_time > 0:
            time_pr_in_fct = local_time / total_fct_time * 100
@@ -1227,8 +1235,8 @@ if 0: # old code still to be ported from ProfileMode
        print ('   Theano Op time (included in fct call, Time spent '
               'running thunks) %.3fs %.1f%%(of total) %.1f%%(of fct call)' %
               (local_time, local_time / total_time * 100, time_pr_in_fct))
-        print 'Other time since import %.3fs %.1f%%'%(other_time,other_time/total_time*100)
+        print 'Other time since import %.3fs %.1f%%' % (other_time, other_time / total_time * 100)
-        print '%i Theano fct call, %.3fs per call'%(total_fct_call, time_per_call)
+        print '%i Theano fct call, %.3fs per call' % (total_fct_call, time_per_call)
        print
        print "List of apply that don't have float64 as input but have float64 in outputs. Usefull to know if we forgot some cast when using floatX=float32 or gpu code."
@@ -1236,7 +1244,7 @@ if 0: # old code still to be ported from ProfileMode
        for fct in fct_call.keys():
            for idx, node in enumerate(fct.maker.fgraph.toposort()):
                if any(hasattr(i, 'dtype') and i.dtype == 'float64' for i in node.outputs) and not any(hasattr(i, 'dtype') and i.dtype == 'float64' for i in node.inputs):
-                    print str(node), idx, fct.name, str([getattr(i,'dtype',None) for i in node.inputs]),str([getattr(i,'dtype',None) for i in node.outputs])
+                    print str(node), idx, fct.name, str([getattr(i, 'dtype', None) for i in node.inputs]), str([getattr(i, 'dtype', None) for i in node.outputs])
        if any([x[2].__name__.startswith("Gpu") for x in sotimes]):
            cpu = []
@@ -1254,8 +1262,8 @@ if 0: # old code still to be ported from ProfileMode
            sum_trans = sum(so[1] for so in trans)
            print
-            print "Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op"%(
+            print "Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op" % (
-                sum_cpu, sum_cpu/local_time*100, sum_gpu, sum_gpu/local_time*100, sum_trans, sum_trans/local_time*100)
+                sum_cpu, sum_cpu / local_time * 100, sum_gpu, sum_gpu / local_time * 100, sum_trans, sum_trans / local_time * 100)
            print "Theano function input that are float64"
            print "<fct name> <input name> <input type> <str input>"
@@ -1270,20 +1278,25 @@ if 0: # old code still to be ported from ProfileMode
        from theano.tensor.raw_random import RandomFunction
        import theano
        import theano.scalar as scal
-        scalar_op_amdlibm_no_speed_up = [scal.LT, scal.GT, scal.LE, scal.GE, scal.EQ, scal.NEQ, scal.InRange, scal.Switch, scal.OR, scal.XOR, scal.AND, scal.Invert, scal.Maximum, scal.Minimum, scal.Add, scal.Mul, scal.Sub, scal.TrueDiv, scal.IntDiv, scal.Clip, scal.First, scal.Second, scal.Identity, scal.Cast, scal.Sgn, scal.Neg, scal.Inv, scal.Sqr ]
+        scalar_op_amdlibm_no_speed_up = [scal.LT, scal.GT, scal.LE, scal.GE, scal.EQ, scal.NEQ, scal.InRange, scal.Switch, scal.OR, scal.XOR, scal.AND, scal.Invert, scal.Maximum,
-        scalar_op_amdlibm_speed_up = [scal.Mod, scal.Pow, scal.Ceil, scal.Floor, scal.RoundHalfToEven, scal.RoundHalfAwayFromZero, scal.Log, scal.Log2, scal.Log10, scal.Log1p, scal.Exp, scal.Sqrt, scal.Abs, scal.Cos,  scal.Sin,  scal.Tan,  scal.Tanh,  scal.Cosh,  scal.Sinh, T.nnet.sigm.ScalarSigmoid, T.nnet.sigm.ScalarSoftplus ]#Abs, Mod in float{32,64} only
+                                         scal.Minimum, scal.Add, scal.Mul, scal.Sub, scal.TrueDiv, scal.IntDiv, scal.Clip, scal.First, scal.Second, scal.Identity, scal.Cast, scal.Sgn, scal.Neg, scal.Inv, scal.Sqr]
+        scalar_op_amdlibm_speed_up = [scal.Mod, scal.Pow, scal.Ceil, scal.Floor, scal.RoundHalfToEven, scal.RoundHalfAwayFromZero, scal.Log, scal.Log2, scal.Log10, scal.Log1p, scal.Exp,
+                                      scal.Sqrt, scal.Abs, scal.Cos,  scal.Sin,  scal.Tan,  scal.Tanh,  scal.Cosh,  scal.Sinh, T.nnet.sigm.ScalarSigmoid, T.nnet.sigm.ScalarSoftplus]  # Abs, Mod in float{32,64} only
        def get_scalar_ops(s):
            if isinstance(s, theano.scalar.Composite):
                l = []
                for node in s.fgraph.toposort():
-                    l+=get_scalar_ops(node.op)
+                    l += get_scalar_ops(node.op)
                return l
-            else: return [s]
+            else:
+                return [s]
        def list_scalar_op(op):
            if isinstance(op.scalar_op, theano.scalar.Composite):
                return get_scalar_ops(op.scalar_op)
-            else: return [op.scalar_op]
+            else:
+                return [op.scalar_op]
        def amdlibm_speed_up(op):
            if not isinstance(op, T.Elemwise):
@@ -1296,6 +1309,7 @@ if 0: # old code still to be ported from ProfileMode
                    elif s_op.__class__ not in scalar_op_amdlibm_no_speed_up:
                        print "We don't know if amdlibm will accelerate this scalar op.", s_op
                return False
        def exp_float32_op(op):
            if not isinstance(op, T.Elemwise):
                return False
@@ -1303,19 +1317,19 @@ if 0: # old code still to be ported from ProfileMode
                l = list_scalar_op(op)
                return any([s_op.__class__ in [scal.Exp] for s_op in l])
-        #tip 1
+        # tip 1
-        if config.floatX=='float64':
+        if config.floatX == 'float64':
            print "  - Try the Theano flag floatX=float32"
-        #tip 2
+        # tip 2
-        if not config.lib.amdlibm and any([amdlibm_speed_up(a.op) for i,a in apply_time]):
+        if not config.lib.amdlibm and any([amdlibm_speed_up(a.op) for i, a in apply_time]):
            print "  - Try installing amdlibm and set the Theano flag lib.amdlibm=True. This speed up only some Elemwise operation."
-        #tip 3
+        # tip 3
-        if not config.lib.amdlibm and any([exp_float32_op(a.op) and a.inputs[0].dtype=='float32' for i,a in apply_time]):
+        if not config.lib.amdlibm and any([exp_float32_op(a.op) and a.inputs[0].dtype == 'float32' for i, a in apply_time]):
            print "  - With the default gcc libm, exp in float32 is slower than in float64! Try Theano flags floatX=float64 or install amdlibm and set the theano flags lib.amdlibm=True"
-        #tip 4
+        # tip 4
        for a, t in apply_time.iteritems():
            node = a
            if (isinstance(node.op, T.Dot) and
@@ -1326,7 +1340,7 @@ if 0: # old code still to be ported from ProfileMode
                       "Currently they are: %s" %
                       [i.type for i in node.inputs])
-        #tip 5
+        # tip 5
        for a, t in apply_time.iteritems():
            node = a
            if isinstance(node.op, RandomFunction):
@@ -1414,7 +1428,7 @@ if 0: # old code still to be ported from ProfileMode
                tb = b_time.pop(a, 0)
                r[a] += ta - tb
-            #they are missing in a
+            # they are missing in a
            for a, t in b_time.items():
                r.setdefault(a, 0)
                r[a] += t
@@ -1435,8 +1449,6 @@ if 0: # old code still to be ported from ProfileMode
            n_ops_to_print=n_ops_to_print, print_apply=False)
 class ScanProfileStats(ProfileStats):
    callcount = 0.0
    nbsteps = 0.0