Remove ProfileMode

28785b40 · Frederic Bastien · c193af5c · 28785b40 · 28785b40 · 28785b40
--- a/Theano.pyproj
+++ b/Theano.pyproj
@@ -34,7 +34,6 @@
    <Compile Include="theano\compile\mode.py" />
    <Compile Include="theano\compile\module.py" />
    <Compile Include="theano\compile\pfunc.py" />
-    <Compile Include="theano\compile\profilemode.py" />
    <Compile Include="theano\compile\profiling.py" />
    <Compile Include="theano\compile\sandbox\__init__.py" />
    <Compile Include="theano\compile\sharedvalue.py" />

--- a/benchmark/autoencoder/aa.py
+++ b/benchmark/autoencoder/aa.py
@@ -8,7 +8,7 @@ import theano
 import theano.tensor as T
 import theano.sandbox
 from six.moves import xrange
-from theano.compile import module, Mode, ProfileMode
+from theano.compile import module, Mode
 from theano import gof, Op, Apply

 from theano.tensor import blas, opt
@@ -191,7 +191,6 @@ class M(module.Module):

 mod = M()
 mode = 'FAST_RUN'
-#mode = ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
 mode = Mode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker(nice_errors=True))
 mode = Mode(optimizer='fast_run', linker='c')
 mode = Mode(optimizer='fast_run', linker='c|py')

--- a/benchmark/regression/regression.py
+++ b/benchmark/regression/regression.py
@@ -91,7 +91,6 @@ class PrintEverythingMode(theano.Mode):

 def test_module_advanced_example():

-    profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
    profmode = PrintEverythingMode(theano.gof.OpWiseCLinker(), 'fast_run')

    data_x = N.random.randn(4, 10)

--- a/doc/library/compile/index.txt
+++ b/doc/library/compile/index.txt
@@ -19,7 +19,6 @@
    ops
    mode
    debugmode
-    profilemode
    nanguardmode



--- a/doc/library/compile/mode.txt
+++ b/doc/library/compile/mode.txt
@@ -21,10 +21,8 @@ Theano defines the following modes by name:
 - ``'FAST_COMPILE'``: Apply just a few graph optimizations and only use Python implementations.
 - ``'FAST_RUN'``: Apply all optimizations, and use C implementations where possible.
 - ``'DebugMode'``: A mode for debugging. See :ref:`DebugMode <debugmode>` for details.
- ``'ProfileMode'``: Deprecated, use the Theano flag :attr:`config.profile`.
 - ``'NanGuardMode``: :ref:`Nan detector <nanguardmode>`
 - ``'DEBUG_MODE'``: Deprecated. Use the string DebugMode.
- ``'PROFILE_MODE'``: Deprecated, use the Theano flag :attr:`config.profile`.

 The default mode is typically ``FAST_RUN``, but it can be controlled via the
 configuration variable :attr:`config.mode`, which can be

--- a/doc/library/compile/profilemode.txt
+++ b/doc/library/compile/profilemode.txt
@@ -16,203 +16,4 @@ Guide

 .. note::

-    ProfileMode is deprecated. Use :attr:`config.profile` instead.
-
-To profile a Theano graph, a special mode called ProfileMode, must be passed as
-an argument when compiling your graph. Using ProfileMode is a three-step
-process.
-
-Creating a ProfileMode Instance
-------------------------------
-
-First create a ProfileMode instance.
-
->>> import theano
->>> from theano import ProfileMode
->>> profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
-
-The ProfileMode constructor takes as input an optimizer and a
-linker. Which optimizer and linker to use will depend on the
-application. For example, a user wanting to profile the Python
-implementation only, should use the gof.PerformLinker (or "py" for
-short). On the other hand, a user wanting to profile his graph using C
-implementations wherever possible should use the ``gof.OpWiseCLinker``
-(or "c|py").
-
-In the same manner, modifying which optimizer is passed to ProfileMode
-will decide which optimizations are applied to the graph, prior to
-profiling. Changing the optimizer should be especially useful when
-developing new graph optimizations, in order to evaluate their impact
-on performance. Also keep in mind that optimizations might change the
-computation graph a lot, meaning that you might not recognize some of
-the operations that are profiled (you did not use them explicitly but
-an optimizer decided to use it to improve performance or numerical
-stability). If you cannot easily relate the output of ProfileMode with
-the computations you defined, you might want to try setting optimizer
-to None (but keep in mind the computations will be slower than if they
-were optimized).
-
-Note that most users will want to use ProfileMode to optimize their
-graph and find where most of the computation time is being spent. In
-this context, 'fast_run' optimizer and ``gof.OpWiseCLinker`` are the
-most appropriate choices.
-
-Compiling your Graph with ProfileMode
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Once the ProfileMode instance is created, simply compile your graph as you
-would normally, by specifying the mode parameter.
-
-.. testsetup::
-
-   import theano
-   input1, input2 = theano.tensor.scalars(2)
-   output1 = input1+input2
-
->>> # with functions
->>> f = theano.function([input1,input2],[output1], mode=profmode)
-
-Retrieving Timing Information
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Once your graph is compiled, simply run the program or operation you wish to
-profile, then call ``profmode.print_summary()``. This will provide you with
-the desired timing information, indicating where your graph is spending most
-of its time.
-
-This is best shown through an example.
-Lets use the example of logistic
-regression.  (Code for this example is in the file
-``benchmark/regression/regression.py``.)
-
-Compiling the module with ProfileMode and calling ``profmode.print_summary()``
-generates the following output:
-
-.. code-block:: python
-
-    """
-    ProfileMode.print_summary()
-    ---------------------------
-
-    local_time 0.0749197006226 (Time spent running thunks)
-    Apply-wise summary: <fraction of local_time spent at this position> (<Apply position>, <Apply Op name>)
-            0.069   15      _dot22
-            0.064   1       _dot22
-            0.053   0       InplaceDimShuffle{x,0}
-            0.049   2       InplaceDimShuffle{1,0}
-            0.049   10      mul
-            0.049   6       Elemwise{ScalarSigmoid{output_types_preference=<theano.scalar.basic.transfer_type object at 0x171e650>}}[(0, 0)]
-            0.049   3       InplaceDimShuffle{x}
-            0.049   4       InplaceDimShuffle{x,x}
-            0.048   14      Sum{0}
-            0.047   7       sub
-            0.046   17      mul
-            0.045   9       sqr
-            0.045   8       Elemwise{sub}
-            0.045   16      Sum
-            0.044   18      mul
-       ... (remaining 6 Apply instances account for 0.25 of the runtime)
-    Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>
-            0.139   * mul
-            0.134   * _dot22
-            0.092   * sub
-            0.085   * Elemwise{Sub{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1779f10>}}[(0, 0)]
-            0.053   * InplaceDimShuffle{x,0}
-            0.049   * InplaceDimShuffle{1,0}
-            0.049   * Elemwise{ScalarSigmoid{output_types_preference=<theano.scalar.basic.transfer_type object at 0x171e650>}}[(0, 0)]
-            0.049   * InplaceDimShuffle{x}
-            0.049   * InplaceDimShuffle{x,x}
-            0.048   * Sum{0}
-            0.045   * sqr
-            0.045   * Sum
-            0.043   * Sum{1}
-            0.042   * Elemwise{Mul{output_types_preference=<theano.scalar.basic.transfer_type object at 0x17a0f50>}}[(0, 1)]
-            0.041   * Elemwise{Add{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1736a50>}}[(0, 0)]
-            0.039   * Elemwise{Second{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1736d90>}}[(0, 1)]
-       ... (remaining 0 Ops account for 0.00 of the runtime)
-    (*) Op is running a c implementation
-
-    """
-
-
-
-.. note::
-
-    ***TODO***
-
-    The following text was recovered from a recent version of the source
-    file... hopefully things haven't gotten too out-of-sync!
-
-    The first show an Apply-wise summary, the second show an Op-wise summary, the third show an type-Op-wise summary.
-
-    The Apply-wise summary print the timing information for the worst
-    offending Apply nodes. This corresponds to individual Op applications
-    within your graph which take the longest to execute (so if you use dot
-    twice, you will see two entries there).
-
-    The Op-wise summary print the execution time of all Apply nodes
-    executing the same Op are grouped together and the total execution
-    time per Op is shown (so if you use dot twice, you will see only one
-    entry there corresponding to the sum of the time spent in each of
-    them). If two Op have different hash value, they will be separate.
-
-    The type-Op-wise summary group the result by type of op. So event if
-    two Op have different hash value, they will be merged.
-
-    Their is an hack with the Op-wise summary. Go see it if you want to know more.
-
-
-
-The summary has two components to it. In the first section called the
-Apply-wise summary, timing information is provided for the worst
-offending Apply nodes. This corresponds to individual Op applications
-within your graph which take the longest to execute (so if you use
-``dot`` twice, you will see two entries there). In the second portion,
-the Op-wise summary, the execution time of all Apply nodes executing
-the same Op are grouped together and the total execution time per Op
-is shown (so if you use ``dot`` twice, you will see only one entry
-there corresponding to the sum of the time spent in each of them).
-
-Note that the ProfileMode also shows which Ops were running a c
-implementation.
-
-Developers wishing to optimize the performance of their graph should
-focus on the worst offending Ops and Apply nodes -- either by optimizing an
-implementation, providing a missing C implementation, or by writing a graph
-optimization that eliminates the offending Op altogether.
-You should strongly consider emailing one of our lists about your issue before
-spending too much time on this.
-
-
-Reference
-=========
-
-.. class:: ProfileMode(Mode)
-
-    .. method:: print_summary(n_apply_to_print=None, n_ops_to_print=None)
-
-        Print three summaries to stdout that show where cpu time is spent during theano function executions (for all functions using this object instance).
-
-        :param n_apply_to_print: the number of apply nodes to print.
-           The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`.
-
-        :param n_ops_to_print: the number of ops to print.
-           Default 20, or but can be configured via ``ProfileMode.n_apply_to_print`` in :envvar:`THEANO_FLAGS`.
-
-        :returns: None
-
-    .. method:: print_diff_summary(self, other, n_apply_to_print=None, n_ops_to_print=None):
-        """ As print_summary, but print the difference on two different profile mode.
-        TODO: Also we don't print the Apply-wise summary as it don't work for now.
-        TODO: make comparaison with gpu code.
-
-        :param other: the other instance of ProfileMode that we want to be compared to.
-
-        :param n_apply_to_print: the number of apply nodes to print.
-           The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`.
-
-        :param n_ops_to_print: the number of ops to print.
-           Default 20, or but can be configured via ``ProfileMode.n_apply_to_print`` in :envvar:`THEANO_FLAGS`.
-
-        :returns: None
-
+    ProfileMode is removed. Use :attr:`config.profile` instead.
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -315,7 +315,7 @@ import theano and print the config variable, as in:

 .. attribute:: mode

-    String value: ``'Mode'``, ``'ProfileMode'`` (deprecated), ``'DebugMode'``, ``'FAST_RUN'``,
+    String value: ``'Mode'``, ``'DebugMode'``, ``'FAST_RUN'``,
    ``'FAST_COMPILE'``

    Default: ``'Mode'``

--- a/doc/tutorial/debug_faq.txt
+++ b/doc/tutorial/debug_faq.txt
@@ -284,7 +284,7 @@ First, make sure you're running in ``FAST_RUN`` mode. Even though
 to ``theano.function`` (or ``theano.make``) or by setting :attr:`config.mode`
 to ``FAST_RUN``.

-Second, try the Theano :ref:`using_profilemode`.  This will tell you which
+Second, try the Theano :ref:`profiling <tut_profiling>`.  This will tell you which
 ``Apply`` nodes, and which ops are eating up your CPU cycles.

 Tips:

--- a/doc/tutorial/modes.txt
+++ b/doc/tutorial/modes.txt
@@ -248,13 +248,3 @@ constructor arguments. The keyword version of DebugMode (which you get by using
 is quite strict.

 For more detail, see :ref:`DebugMode<debugmode>` in the library.
-
-.. _using_profilemode:
-
-
-ProfileMode
-===========
-
-.. note::
-
-    ProfileMode is deprecated. Use :attr:`config.profile` instead.
--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -73,7 +73,7 @@ from theano.compile import (
    Mode,
    predefined_modes, predefined_linkers, predefined_optimizers,
    FunctionMaker, function, function_dump, OpFromGraph,
-    ProfileMode, ProfileStats,
+    ProfileStats,
    Param, shared, as_op)

 from theano.misc.safe_asarray import _asarray

--- a/theano/compile/__init__.py
+++ b/theano/compile/__init__.py
@@ -19,8 +19,6 @@ from theano.compile.monitormode import MonitorMode

 from theano.compile.profiling import ProfileStats, ScanProfileStats

-from theano.compile.profilemode import ProfileMode
-
 from theano.compile.sharedvalue import (shared, shared_constructor,
                                        SharedVariable)
 from theano.compile.pfunc import pfunc, Param, rebuild_collect_shared

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -1381,17 +1381,11 @@ class FunctionMaker(object):
                 output_keys=None):
        mode = theano.compile.mode.get_mode(mode)

-        # figure out which profile object to use (if any)
-        # to help with forward-porting ProfileMode,
-        # we allow ProfileMode to provide a ProfileStats object
-        # using this somewhat awkward mechanism.
-        mode_profile = getattr(mode, 'profile', None)
-        if (profile is not None and
-                profile is not False and
-                mode_profile is not None):
+        # Assert old way of working isn't used
+        if getattr(mode, 'profile', None):
            raise TypeError(
-                'profile passed via both "mode" and "profile" arguments')
-        self.profile = profile = profile or mode_profile
+                "profile passed via 'mode'. This isn't supported anymore")
+        self.profile = profile
        if profile:
            # This is very important:
            # 1) We preload the cache here to don't have its timming
@@ -1748,9 +1742,6 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,

    - FAST_COMPILE (minimal optimization)

-    - ProfileMode(deprecated): allow to print a profile mode with
-      mode.print_summary
-
    - DebugMode: verify many internal conditions that are normally assumed
      (slow)


--- a/theano/compile/monitormode.py
+++ b/theano/compile/monitormode.py
@@ -79,7 +79,7 @@ class MonitorMode(Mode):
        Create a new instance of this Mode.

        Keyword arguments can be provided for the linker, but they will be
-        ignored, because ProfileMode needs to use its own linker.
+        ignored, because MonitorMode needs to use its own linker.

        """
        if optimizer == "":

--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -3,8 +3,6 @@ ProfileStats object for runtime and memory profiling.

 """
 #
-# TODO: measure memory usage like ProfileMode did
-# TODO: put the optimization tips into a tips section??
 # TODO: add tip to use specify_shape (is specify_shape even in library doc?)
 # TODO: ensure field width for string fields makes columns line up
 # TODO: what to do about 'diff summary'? (ask Fred?)
@@ -378,7 +376,7 @@ class ProfileStats(object):
        else:
            local_time = 0
        if local_time == 0:
-            print(('ProfileMode.summary_class: total time 0'
+            print(('ProfileStats.summary_class: total time 0'
                   ' (did you forget to enable counters?)'), file=file)
            return
        class_time = self.class_time()
@@ -462,7 +460,7 @@ class ProfileStats(object):
        else:
            local_time = 0
        if local_time == 0:
-            print(('ProfileMode.summary_ops: total time 0'
+            print(('ProfileStats.summary_ops: total time 0'
                   ' (did you forget to enable counters?)'), file=file)
            return
        op_time = self.op_time()
@@ -540,7 +538,7 @@ class ProfileStats(object):
        else:
            local_time = 0
        if local_time == 0:
-            print(('ProfileMode.summary_nodes: total time 0'
+            print(('ProfileStats.summary_nodes: total time 0'
                   ' (did you forget to enable counters?)'), file=file)
            return


--- a/theano/compile/tests/test_modes.py
+++ b/theano/compile/tests/test_modes.py
@@ -7,7 +7,7 @@ import unittest

 import theano
 import theano.tensor as T
-from theano.compile import Mode, ProfileMode
+from theano.compile import Mode


 class T_bunch_of_modes(unittest.TestCase):
@@ -18,9 +18,6 @@ class T_bunch_of_modes(unittest.TestCase):
        linker_classes_involved = []

        predef_modes = ['FAST_COMPILE', 'FAST_RUN', 'DEBUG_MODE']
-        # Use a new instance of ProfileMode instead of 'ProfileMode' to
-        # avoid printing a profile mode summary in nose output
-        predef_modes.append(ProfileMode())

        # Linkers to use with regular Mode
        if theano.config.cxx:
@@ -43,20 +40,14 @@ class T_bunch_of_modes(unittest.TestCase):
        # there should be
        # - VM_Linker
        # - OpWiseCLinker (FAST_RUN)
-        # - WrapLinker ("ProfileMode")
+        # - WrapLinker
        # - PerformLinker (FAST_COMPILE)
        # - DebugMode's Linker  (DEBUG_MODE)
        assert 5 == len(set(linker_classes_involved))


-class T_ProfileMode_WrapLinker(unittest.TestCase):
+class T_WrapLinker(unittest.TestCase):
    def test_1(self):
-        # First, compile a function with a new ProfileMode() object
-        # No need to call that function
-        x = T.matrix()
-        mode = ProfileMode()
-        theano.function([x], x * 2, mode=mode)
-
        # Then, build a mode with the same linker, and a modified optimizer
        default_mode = theano.compile.mode.get_default_mode()
        modified_mode = default_mode.including('specialize')

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -405,9 +405,9 @@ AddConfigVar(
 AddConfigVar(
    'mode',
    "Default compilation mode",
-    EnumStr('Mode', 'ProfileMode', 'DebugMode', 'FAST_RUN',
+    EnumStr('Mode', 'DebugMode', 'FAST_RUN',
            'NanGuardMode',
-            'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE'),
+            'FAST_COMPILE', 'DEBUG_MODE'),
    in_c_key=False)

 param = "g++"
@@ -463,8 +463,7 @@ del param
 if rc == 0 and config.cxx != "":
    # Keep the default linker the same as the one for the mode FAST_RUN
    AddConfigVar('linker',
-                 ("Default linker used if the theano flags mode is Mode "
-                  "or ProfileMode(deprecated)"),
+                 "Default linker used if the theano flags mode is Mode ",
                 EnumStr('cvm', 'c|py', 'py', 'c', 'c|py_nogc',
                         'vm', 'vm_nogc', 'cvm_nogc'),
                 in_c_key=False)
@@ -472,8 +471,7 @@ else:
    # g++ is not present or the user disabled it,
    # linker should default to python only.
    AddConfigVar('linker',
-                 ("Default linker used if the theano flags mode is Mode "
-                  "or ProfileMode(deprecated)"),
+                 "Default linker used if the theano flags mode is Mode ",
                 EnumStr('vm', 'py', 'vm_nogc'),
                 in_c_key=False)
    try:
@@ -501,8 +499,7 @@ AddConfigVar('allow_gc',
 # Keep the default optimizer the same as the one for the mode FAST_RUN
 AddConfigVar(
    'optimizer',
-    ("Default optimizer. If not None, will use this linker with the Mode "
-     "object (not ProfileMode(deprecated) or DebugMode)"),
+    "Default optimizer. If not None, will use this optimizer with the Mode ",
    EnumStr('fast_run', 'merge', 'fast_compile', 'None'),
    in_c_key=False)

@@ -951,27 +948,6 @@ AddConfigVar('NanGuardMode.action',
             EnumStr('raise', 'warn', 'pdb'),
             in_c_key=False)

-AddConfigVar('ProfileMode.n_apply_to_print',
-             "Number of apply instances to print by default",
-             IntParam(15, lambda i: i > 0),
-             in_c_key=False)
-
-AddConfigVar('ProfileMode.n_ops_to_print',
-             "Number of ops to print by default",
-             IntParam(20, lambda i: i > 0),
-             in_c_key=False)
-
-AddConfigVar('ProfileMode.min_memory_size',
-             "For the memory profile, do not print apply nodes if the size "
-             "of their outputs (in bytes) is lower then this threshold",
-             IntParam(1024, lambda i: i >= 0),
-             in_c_key=False)
-
-AddConfigVar('ProfileMode.profile_memory',
-             """Enable profiling of memory used by Theano functions""",
-             BoolParam(False),
-             in_c_key=False)
-
 AddConfigVar('optimizer_excluding',
             ("When using the default mode, we will remove optimizer with "
              "these tags. Separate tags with ':'."),

--- a/theano/d3viz/formatting.py
+++ b/theano/d3viz/formatting.py
@@ -11,7 +11,6 @@ from six import iteritems, itervalues

 import theano
 from theano import gof
-from theano.compile.profilemode import ProfileMode
 from theano.compile import Function
 from theano.compile import builders
 from theano.printing import pydot_imported, pydot_imported_msg
@@ -123,13 +122,6 @@ class PyDotFormatter(object):

        profile = None
        if isinstance(fct, Function):
-            mode = fct.maker.mode
-            if (not isinstance(mode, ProfileMode) or
-                    fct not in mode.profile_stats):
-                mode = None
-            if mode:
-                profile = mode.profile_stats[fct]
-            else:
            profile = getattr(fct, "profile", None)
            outputs = fct.maker.fgraph.outputs
            topo = fct.maker.fgraph.toposort()

--- a/theano/printing.py
+++ b/theano/printing.py
@@ -20,7 +20,6 @@ from theano import gof
 from theano import config
 from theano.gof import Op, Apply
 from theano.compile import Function, debugmode, SharedVariable
-from theano.compile.profilemode import ProfileMode

 pydot_imported = False
 pydot_imported_msg = ""
@@ -746,15 +745,10 @@ def pydotprint(fct, outfile=None,
                               config.device + '.' + format)

    if isinstance(fct, Function):
-        mode = fct.maker.mode
        profile = getattr(fct, "profile", None)
-        if (not isinstance(mode, ProfileMode) or
-                fct not in mode.profile_stats):
-                mode = None
        outputs = fct.maker.fgraph.outputs
        topo = fct.maker.fgraph.toposort()
    elif isinstance(fct, gof.FunctionGraph):
-        mode = None
        profile = None
        outputs = fct.outputs
        topo = fct.toposort()
@@ -767,7 +761,6 @@ def pydotprint(fct, outfile=None,
        assert all(isinstance(v, gof.Variable) for v in fct)
        fct = gof.FunctionGraph(inputs=gof.graph.inputs(fct),
                                outputs=fct)
-        mode = None
        profile = None
        outputs = fct.outputs
        topo = fct.toposort()
@@ -855,19 +848,7 @@ def pydotprint(fct, outfile=None,
        if node in apply_name_cache:
            return apply_name_cache[node], apply_name_id[node]
        prof_str = ''
-        if mode:
-            time = mode.profile_stats[fct].apply_time.get(node, 0)
-            # second, % total time in profiler, %fct time in profiler
-            if mode.local_time == 0:
-                pt = 0
-            else:
-                pt = time * 100 / mode.local_time
-            if mode.profile_stats[fct].fct_callcount == 0:
-                pf = 0
-            else:
-                pf = time * 100 / mode.profile_stats[fct].fct_call_time
-            prof_str = '   (%.3fs,%.3f%%,%.3f%%)' % (time, pt, pf)
-        elif profile:
+        if profile:
            time = profile.apply_time.get(node, 0)
            # second, %fct time in profiler
            if profile.fct_callcount == 0:

--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
@@ -907,7 +907,7 @@ class BaseGpuCorrMM(GpuOp):

    def flops(self, inp, outp):
        """
-        Useful with the hack in profilemode to print the MFlops.
+        Useful with the hack in profiling to print the MFlops.

        """
        # if the output shape is correct, then this gives the correct
@@ -1421,7 +1421,7 @@ class BaseGpuCorr3dMM(GpuOp):
            self.pad)

    def flops(self, inp, outp):
-        """ Useful with the hack in profilemode to print the MFlops"""
+        """ Useful with the hack in profiling to print the MFlops"""
        # if the output shape is correct, then this gives the correct
        # flops for any direction, sampling, padding, and border mode
        inputs, filters = inp
@@ -2101,7 +2101,7 @@ class GpuConv(GpuOp):
        return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])

    def flops(self, inputs, outputs):
-        """ Useful with the hack in profilemode to print the MFlops"""
+        """ Useful with the hack in profiling to print the MFlops"""
        images, kerns = inputs
        out, = outputs
        assert images[1] == kerns[1]

--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -1370,12 +1370,12 @@ def speed_adv_sub1():
    vec = tensor.lvector()
    for batch_size in [100, 1000, 10000, 100000]:
        idx = numpy.random.randint(0, 50000, batch_size)
-        mode_with_gpu = theano.compile.ProfileMode().including('gpu')
-        f = theano.function([vec], var[vec], mode=mode_with_gpu)
+        mode_with_gpu = theano.compile.get_default_mode().including('gpu')
+        f = theano.function([vec], var[vec], mode=mode_with_gpu, profile=True)
        for i in range(100):
            f(idx)
-        print("ProfileMode with batch size", batch_size)
-        mode_with_gpu.print_summary()
+        print("profile with batch size", batch_size)
+        mode_with_gpu.summary()


 def speed_reduce10():

--- a/theano/sandbox/cuda/tests/test_mlp.py
+++ b/theano/sandbox/cuda/tests/test_mlp.py
@@ -19,7 +19,7 @@ import theano.sandbox.cuda as tcn
 import theano.tests.unittest_tools as utt


-if theano.config.mode not in ['FAST_RUN', 'Mode', 'ProfileMode']:
+if theano.config.mode not in ['FAST_RUN', 'Mode']:
    raise SkipTest('Skip test_mlp when not in normal optimization mode as '
                   'otherwise it is too slow!')

@@ -48,8 +48,6 @@ def get_mode(use_gpu, check_isfinite=True):
        ret = theano.compile.get_default_mode()
    else:
        ret = theano.compile.mode.get_mode('FAST_RUN')
-    if isinstance(ret, theano.compile.ProfileMode):
-        ret = copy.copy(ret)
    if isinstance(ret, theano.compile.DebugMode):
        ret = copy.copy(ret)
        ret.check_isfinite = check_isfinite
@@ -60,19 +58,6 @@ def get_mode(use_gpu, check_isfinite=True):
    return ret


-def print_mode(mode):
-    if mode is not None and isinstance(mode, (theano.compile.ProfileMode,)):
-        mode.print_summary()
-
-
-def print_diff_mode(a, b):
-    if (a is not None and
-        isinstance(a, (theano.compile.ProfileMode,)) and
-       isinstance(b, (theano.compile.ProfileMode,))):
-
-        a.print_diff_summary(b)
-
-
 def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
             n_train=100):

@@ -123,7 +108,6 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
        rval.append(train(xval, yval, lr))
    dt = time.time() - t0

-    print_mode(mode)
    return numpy.asarray(rval), dt


@@ -220,7 +204,6 @@ def run_conv_nnet1(use_gpu):
    for i in xrange(n_train):
        rval = train(xval, yval, lr)
    # print 'training done'
-    print_mode(mode)
    return rval


@@ -316,7 +299,6 @@ def run_conv_nnet2(use_gpu):  # pretend we are training LeNet for MNIST
    for i in xrange(n_train):
        rval = train(xval, yval, lr)

-    print_mode(mode)
    return rval


@@ -428,7 +410,6 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
 def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize,
                           n_train=10,
                           check_isfinite=True,
-                           pickle=False,
                           verbose=0,
                           version=-1):
    """Run the train function returned by build_conv_nnet2_classif on one device.
@@ -456,17 +437,6 @@ def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize,
    rvals = my_zeros(n_train)
    for i in xrange(n_train):
        rvals[i] = train(xval, yval, lr)[0]
-    print_mode(mode)
-
-    if pickle and isinstance(mode, theano.compile.ProfileMode):
-        import pickle
-        print("BEGIN %s profile mode dump" % device)
-        print(pickle.dumps(mode))
-        print("END %s profile mode dump" % device)
-
-    # print "%s time: %.3f" % (device, t1-t0)
-    # print "estimated time for one pass through MNIST with %s: %f" % (
-    #        device, (t1-t0) * (60000.0 / (n_train*bsize)))


 def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
@@ -476,7 +446,6 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
                               cpu_only=False,
                               float_atol=1e-06,
                               check_isfinite=True,
-                               pickle=False,
                               verbose=0,
                               version=-1):
    """Run the nnet2 function on 1 or 2 devices, and compares the results.
@@ -512,7 +481,6 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
                seed=seed, isize=isize, ksize=ksize, bsize=bsize,
                n_train=n_train,
                check_isfinite=check_isfinite,
-                pickle=pickle,
                verbose=verbose,
                version=version)


--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -175,7 +175,7 @@ class Scan(PureOp):

        mode_instance = compile.mode.get_mode(self.mode)
        # Clone mode_instance, altering "allow_gc" for the linker,
-        # and adding a message if the mode is a ProfileMode.
+        # and adding a message if we profile
        if self.name:
            message = self.name + " sub profile"
        else:
@@ -1564,14 +1564,6 @@ class Scan(PureOp):
            if hasattr(self.fn.fn, 'update_profile'):
                self.fn.fn.update_profile(profile)

-        #/* Old ProfileMode
-        # if hasattr(self.fn.maker.mode,'fct_call_time'):
-        #    self.fn.maker.mode.fct_call_time[self.fn] += t_fn
-        #    self.fn.maker.mode.fct_call[self.fn] += n_steps
-
-        #self.fn.maker.mode.call_time += t_fn
-        #self.fn.maker.mode.fn_time += t_fn
-        # Old Profile Mode */
        self.t_call = t_call
        self.t_fn = t_fn


--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -719,7 +719,7 @@ class BaseAbstractConv2d(Op):
        self.filter_dilation = tuple(filter_dilation)

    def flops(self, inp, outp):
-        """ Useful with the hack in profilemode to print the MFlops"""
+        """ Useful with the hack in profiling to print the MFlops"""
        # if the output shape is correct, then this gives the correct
        # flops for any direction, sampling, padding, and border mode
        inputs, filters = inp

--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -609,7 +609,7 @@ class ConvOp(OpenMPOp):

    def flops(self, inputs, outputs):
        """
-        Useful with the hack in profilemode to print the MFlops.
+        Useful with the hack in profiling to print the MFlops.

        """
        images, kerns = inputs

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -1394,11 +1394,7 @@ class test_fusion(unittest.TestCase):

    def speed_log_exp(self):
        s = slice(31, 36)
-#        linker=gof.CLinker
-        linker = gof.OpWiseCLinker
-        mode = compile.Mode(linker(), copy.copy(compile.mode.OPT_FAST_RUN))
-        mode = compile.ProfileMode()
-        print("time", self.do(mode, shared, shp=(1000, 1000), gpu=False,
+        print("time", self.do(None, shared, shp=(1000, 1000), gpu=False,
                              assert_len_topo=False, slice=s, nb_repeat=100))

    def tes_memory_leak(self, mode=compile.mode.Mode('c', 'merge'),

--- a/theano/tests/test_printing.py
+++ b/theano/tests/test_printing.py
@@ -115,15 +115,19 @@ def test_pydotprint_long_name():


 def test_pydotprint_profile():
-    """Just check that pydotprint does not crash with ProfileMode."""
+    """Just check that pydotprint does not crash with profile."""

    # Skip test if pydot is not available.
    if not theano.printing.pydot_imported:
        raise SkipTest('pydot not available')

    A = tensor.matrix()
-    f = theano.function([A], A + 1, mode='ProfileMode')
+    prof = theano.compile.ProfileStats(atexit_print=False)
+    f = theano.function([A], A + 1, profile=prof)
    theano.printing.pydotprint(f, print_output_file=False)
+    f([[1]])
+    theano.printing.pydotprint(f, print_output_file=False)
+


 def test_min_informative_str():