The old ProfileMode memory profile is not removed.

We print a message to use the new profiler in that case. This repair ProfileMode

The old ProfileMode memory profile is not removed.
061bb990 · Frederic · 3005d9d3 · 061bb990
--- a/theano/compile/profilemode.py
+++ b/theano/compile/profilemode.py
@@ -50,7 +50,6 @@ class Profile_Maker(FunctionMaker):
        #initialize the timers
        for i, node in enumerate(ret.maker.fgraph.toposort()):
            profile.apply_time[node] = 0.0
-            profile.outputs_size[node] = [0.0] * len(node.outputs)

            # a thunk_group is a list of the thunks from each linker
            # corresponding to the i'th position in the toposort.
@@ -70,13 +69,13 @@ class Profile_Maker(FunctionMaker):

        def new_fn():
            self.mode.apply_time = self.mode.profile_stats[ret].apply_time
-            self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size
+            self.mode.variable_shape = self.mode.profile_stats[ret].variable_shape
            ret_fn()
            # delete the old apply_time variable
            # because it doesn't mean the same thing anymore.
            # This prevents old code from looking like it still works.
            del self.mode.apply_time
-            del self.mode.outputs_size
+            del self.mode.variable_shape

        ret.fn = new_fn

@@ -165,34 +164,10 @@ class ProfileMode(Mode):
                t0 = time.time()
                th()
                dt = time.time() - t0
-            size = []
-            for o in th.outputs:
-                if not hasattr(o[0], 'size'):
-                    #if the output type don't have a size attribute, set -1
-                    #to signify we can't evaluate it.
-                    #This happen at least for mtrand.RandomState type(in numpy)
-                    size.append(-1)
-                    continue
-                s = o[0].size
-                #can't use o[0].dtype.itemsize as dtype is a str for
-                #CudaNdarray
-                dtype = str(o[0].dtype)
-                dtype2 = dtype[-2:]
-                if dtype2 == '32':
-                    s *= 4
-                elif dtype2 == '64':
-                    s *= 8
-                elif dtype2 == '16':
-                    s *= 2
-                elif dtype[-1] == '8':
-                    s *= 1
-                elif dtype[-3:] == '128':
-                    s *= 16
-                else:
-                    raise Exception("Can't determine the memory size of dtype",
-                                    o[0].dtype)
-                size.append(s)
-            self.outputs_size[node] = size
+            for var, data in zip(node.outputs, th.outputs):
+                sh = getattr(data[0], 'shape', 'input no shape')
+                self.variable_shape[var] = sh
+
            self.apply_time[node] += max(dt, 1e-14)

        self.provided_linker = linker
@@ -247,15 +222,14 @@ class ProfileMode(Mode):
                print i, n

        apply_cimpl = {}
-        outputs_size = {}
        for fn, ps in self.profile_stats.items():
            apply_cimpl.update(ps.apply_cimpl)

        message = self.message

-        outputs_size = {}
+        variable_shape = {}
        for fn, ps in self.profile_stats.items():
-            outputs_size.update(ps.outputs_size)
+            variable_shape.update(ps.variable_shape)

        other_time = dict(
                linker_time=sum(
@@ -265,7 +239,7 @@ class ProfileMode(Mode):

        self.print_summary_("print_summary",
                            compile_time, fct_call_time, fct_call,
-                            apply_time, apply_cimpl, message, outputs_size,
+                            apply_time, apply_cimpl, message, variable_shape,
                            self.local_time, other_time,
                            **kwargs)

@@ -300,19 +274,19 @@ class ProfileMode(Mode):
        apply_time = diff_dict(self.apply_time, other.apply_time)
        apply_cimpl = self.apply_cimpl and other.apply_cimpl
        message = self.message
-        outputs_size = diff_dict(self.outputs_size, other.outputs_size)
+        variable_shape = diff_dict(self.variable_shape, other.variable_shape)
        other_time = {'linker_time': self.linker_time - other.linker_time,
                      'optimizer_time': self.optimizer_time -
                                        other.optimizer_time}
        self.print_summary_("print_diff_summary", compile_time,
                            fct_call_time, fct_call,
-                            apply_time, apply_cimpl, message, outputs_size,
+                            apply_time, apply_cimpl, message, variable_shape,
                            print_apply=False, other_time=other_time,
                            **kwargs)

    @staticmethod
    def print_summary_(fct_name, compile_time, fct_call_time, fct_call,
-                       apply_time, apply_cimpl, message, outputs_size,
+                       apply_time, apply_cimpl, message, variable_shape,
                       local_time, other_time,
                       n_apply_to_print=config.ProfileMode.n_apply_to_print,
                       n_ops_to_print=config.ProfileMode.n_ops_to_print,
@@ -502,96 +476,17 @@ class ProfileMode(Mode):
            print '(*) Op is running a c implementation'
        for printer in profiler_printers:
            printer(fct_name, compile_time, fct_call_time, fct_call,
-                    apply_time, apply_cimpl, message, outputs_size,
+                    apply_time, apply_cimpl, message, variable_shape,
                    other_time)

-        if not outputs_size:
+        if not variable_shape:
            print """\nProfile of Theano intermediate memory disabled.
 To enabled, put the Theano flag ProfileMode.profile_memory to True."""
        else:
-            fct_memory={}#fgraph->dict(node->(outputs size))
-            var_mem = {}
-            for node, val in outputs_size.items():
-                fct_memory.setdefault(node.fgraph, {})
-                fct_memory[node.fgraph][node]=val
-                for out,v in zip(node.outputs,val):
-                    var_mem[out]=v
-            print
-            print "Profile of Theano functions memory:"
-            print "(This check only the output of each apply node. It don't check the temporary memory used by the op in the apply node.)"
-            nb_skipped = 0
-            for fgraph, nodes_mem in fct_memory.iteritems():
-                size_sum=sum([sum(val) for key,val in nodes_mem.iteritems()])
-                if size_sum < min_memory_size:
-                    nb_skipped += 1
-                    continue
-                print "Theano fct:", [fct for fct in fct_call.keys() if fct.maker.fgraph is fgraph][0].name
-                print "    Max without gc, inplace and view (KB)",size_sum/1024
-
-                node_memory_size = 0
-                node_memory_saved_by_view = 0
-                node_memory_saved_by_inplace = 0
-                running_memory_size = 0
-                running_max_memory_size = 0
-                post_thunk_old_storage = []
-                items = nodes_mem.items()
-                items.sort(key=lambda a: a[1])
-                items.reverse()
-
-                order = self.linker.schedule(fgraph)
-                computed, last_user = gof.link.gc_helper(order)
-                for node in order:
-                    post_thunk_old_storage.append([ input_idx
-                                                    for input_idx,input in enumerate(node.inputs)
-                                                    if (input in computed) and (input not in fgraph.outputs) and node == last_user[input]])
-                for node,val in items[:n_apply_to_print]:
-                    dmap = getattr(node.op,'destroy_map',None)
-                    vmap = getattr(node.op,'view_map',None)
-
-                    for idx,v in enumerate(val):
-                        if dmap and idx in dmap:#TODO check the op returned a view
-                            node_memory_saved_by_inplace += v
-                        elif vmap and idx in vmap:#TODO check the op returned a view
-                            node_memory_saved_by_view += v
-                        else:
-                            node_memory_size += v
-                            running_memory_size += v
-                            if running_memory_size > running_max_memory_size:
-                                running_max_memory_size = running_memory_size
-                            old_storage = post_thunk_old_storage[order.index(node)]
-                            for old_s in old_storage:
-                                running_memory_size -= var_mem[node.inputs[old_s]]
-                                pass
-                    pass
-
-                print "    Max FAST_RUN_NO_GC (KB)", node_memory_size/1024
-                print "    Max FAST_RUN (KB)", running_max_memory_size/1024
-                print "    Memory saved by view (KB)", node_memory_saved_by_view/1024
-                print "    Memory saved by inplace (KB)", node_memory_saved_by_inplace/1024
-                print "    Memory saved by GC (KB)", (node_memory_size-running_max_memory_size)/1024
-
-                n_apply_to_print+=10#TODO remove this line
-                print "    <Sum apply outputs (bytes)> <Apply outputs memory size(bytes)> <created/inplace/view> <Apply node>"
-                print "    <created/inplace/view> is taked from the op declaration, not the op exeuction. Use DebugMode to have warning about inplace/view declaration being respected."
-
-                n_apply_printed = 0
-                for key,val in items[:n_apply_to_print]:
-                    if sum(val) < min_memory_size:
-                        break
-                    code = ['c']*len(node.outputs)
-                    for out,inp in getattr(key.op,'destroy_map',{}).iteritems():
-                        code[out] = "i"
-                    for out,inp in getattr(key.op,'view_map',{}).iteritems():
-                        code[out] = "v"
-                    print '       %9dB  %s %s %s' % (sum(val), str(val), ' '.join(code), key)
-                    n_apply_printed += 1
-
-                print '   ... (remaining %i Apply account for %.2f%%(%d bytes) of the total intermediate memory used)'\
-                %(max(0, len(nodes_mem)-n_apply_printed),
-                  sum(sum(val) for key, val in items[n_apply_printed:])/float(size_sum),
-                  sum(sum(val) for key, val in items[n_apply_printed:]))
-            if nb_skipped > 0:
-                print '   We skipped %d theano function(s). Each of them used less then %dB(theano flags ProfileMode.min_memory_size) of total intermediate memory size'%(nb_skipped, min_memory_size)
+            print """
+            The memory profile in ProfileMode is removed!
+            Use the new profiler. Use the Theano flags
+            profile=True,profile_memory=True to enable it."""

        print
        print """Here are tips to potentially make your code run faster