提交 5bdb4a0b authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #5054 from nouiz/profile_mode

Remove ProfileMode
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
<Compile Include="theano\compile\mode.py" /> <Compile Include="theano\compile\mode.py" />
<Compile Include="theano\compile\module.py" /> <Compile Include="theano\compile\module.py" />
<Compile Include="theano\compile\pfunc.py" /> <Compile Include="theano\compile\pfunc.py" />
<Compile Include="theano\compile\profilemode.py" />
<Compile Include="theano\compile\profiling.py" /> <Compile Include="theano\compile\profiling.py" />
<Compile Include="theano\compile\sandbox\__init__.py" /> <Compile Include="theano\compile\sandbox\__init__.py" />
<Compile Include="theano\compile\sharedvalue.py" /> <Compile Include="theano\compile\sharedvalue.py" />
......
...@@ -8,7 +8,7 @@ import theano ...@@ -8,7 +8,7 @@ import theano
import theano.tensor as T import theano.tensor as T
import theano.sandbox import theano.sandbox
from six.moves import xrange from six.moves import xrange
from theano.compile import module, Mode, ProfileMode from theano.compile import module, Mode
from theano import gof, Op, Apply from theano import gof, Op, Apply
from theano.tensor import blas, opt from theano.tensor import blas, opt
...@@ -191,7 +191,6 @@ class M(module.Module): ...@@ -191,7 +191,6 @@ class M(module.Module):
mod = M() mod = M()
mode = 'FAST_RUN' mode = 'FAST_RUN'
#mode = ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
mode = Mode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker(nice_errors=True)) mode = Mode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker(nice_errors=True))
mode = Mode(optimizer='fast_run', linker='c') mode = Mode(optimizer='fast_run', linker='c')
mode = Mode(optimizer='fast_run', linker='c|py') mode = Mode(optimizer='fast_run', linker='c|py')
......
...@@ -91,7 +91,6 @@ class PrintEverythingMode(theano.Mode): ...@@ -91,7 +91,6 @@ class PrintEverythingMode(theano.Mode):
def test_module_advanced_example(): def test_module_advanced_example():
profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
profmode = PrintEverythingMode(theano.gof.OpWiseCLinker(), 'fast_run') profmode = PrintEverythingMode(theano.gof.OpWiseCLinker(), 'fast_run')
data_x = N.random.randn(4, 10) data_x = N.random.randn(4, 10)
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
ops ops
mode mode
debugmode debugmode
profilemode
nanguardmode nanguardmode
......
...@@ -21,10 +21,8 @@ Theano defines the following modes by name: ...@@ -21,10 +21,8 @@ Theano defines the following modes by name:
- ``'FAST_COMPILE'``: Apply just a few graph optimizations and only use Python implementations. - ``'FAST_COMPILE'``: Apply just a few graph optimizations and only use Python implementations.
- ``'FAST_RUN'``: Apply all optimizations, and use C implementations where possible. - ``'FAST_RUN'``: Apply all optimizations, and use C implementations where possible.
- ``'DebugMode'``: A mode for debugging. See :ref:`DebugMode <debugmode>` for details. - ``'DebugMode'``: A mode for debugging. See :ref:`DebugMode <debugmode>` for details.
- ``'ProfileMode'``: Deprecated, use the Theano flag :attr:`config.profile`.
- ``'NanGuardMode``: :ref:`Nan detector <nanguardmode>` - ``'NanGuardMode``: :ref:`Nan detector <nanguardmode>`
- ``'DEBUG_MODE'``: Deprecated. Use the string DebugMode. - ``'DEBUG_MODE'``: Deprecated. Use the string DebugMode.
- ``'PROFILE_MODE'``: Deprecated, use the Theano flag :attr:`config.profile`.
The default mode is typically ``FAST_RUN``, but it can be controlled via the The default mode is typically ``FAST_RUN``, but it can be controlled via the
configuration variable :attr:`config.mode`, which can be configuration variable :attr:`config.mode`, which can be
......
:orphan:
.. _profilemode: .. _profilemode:
...@@ -16,203 +17,4 @@ Guide ...@@ -16,203 +17,4 @@ Guide
.. note:: .. note::
ProfileMode is deprecated. Use :attr:`config.profile` instead. ProfileMode is removed. Use :attr:`config.profile` instead.
To profile a Theano graph, a special mode called ProfileMode, must be passed as
an argument when compiling your graph. Using ProfileMode is a three-step
process.
Creating a ProfileMode Instance
-------------------------------
First create a ProfileMode instance.
>>> import theano
>>> from theano import ProfileMode
>>> profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
The ProfileMode constructor takes as input an optimizer and a
linker. Which optimizer and linker to use will depend on the
application. For example, a user wanting to profile the Python
implementation only, should use the gof.PerformLinker (or "py" for
short). On the other hand, a user wanting to profile his graph using C
implementations wherever possible should use the ``gof.OpWiseCLinker``
(or "c|py").
In the same manner, modifying which optimizer is passed to ProfileMode
will decide which optimizations are applied to the graph, prior to
profiling. Changing the optimizer should be especially useful when
developing new graph optimizations, in order to evaluate their impact
on performance. Also keep in mind that optimizations might change the
computation graph a lot, meaning that you might not recognize some of
the operations that are profiled (you did not use them explicitly but
an optimizer decided to use it to improve performance or numerical
stability). If you cannot easily relate the output of ProfileMode with
the computations you defined, you might want to try setting optimizer
to None (but keep in mind the computations will be slower than if they
were optimized).
Note that most users will want to use ProfileMode to optimize their
graph and find where most of the computation time is being spent. In
this context, 'fast_run' optimizer and ``gof.OpWiseCLinker`` are the
most appropriate choices.
Compiling your Graph with ProfileMode
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Once the ProfileMode instance is created, simply compile your graph as you
would normally, by specifying the mode parameter.
.. testsetup::
import theano
input1, input2 = theano.tensor.scalars(2)
output1 = input1+input2
>>> # with functions
>>> f = theano.function([input1,input2],[output1], mode=profmode)
Retrieving Timing Information
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Once your graph is compiled, simply run the program or operation you wish to
profile, then call ``profmode.print_summary()``. This will provide you with
the desired timing information, indicating where your graph is spending most
of its time.
This is best shown through an example.
Lets use the example of logistic
regression. (Code for this example is in the file
``benchmark/regression/regression.py``.)
Compiling the module with ProfileMode and calling ``profmode.print_summary()``
generates the following output:
.. code-block:: python
"""
ProfileMode.print_summary()
---------------------------
local_time 0.0749197006226 (Time spent running thunks)
Apply-wise summary: <fraction of local_time spent at this position> (<Apply position>, <Apply Op name>)
0.069 15 _dot22
0.064 1 _dot22
0.053 0 InplaceDimShuffle{x,0}
0.049 2 InplaceDimShuffle{1,0}
0.049 10 mul
0.049 6 Elemwise{ScalarSigmoid{output_types_preference=<theano.scalar.basic.transfer_type object at 0x171e650>}}[(0, 0)]
0.049 3 InplaceDimShuffle{x}
0.049 4 InplaceDimShuffle{x,x}
0.048 14 Sum{0}
0.047 7 sub
0.046 17 mul
0.045 9 sqr
0.045 8 Elemwise{sub}
0.045 16 Sum
0.044 18 mul
... (remaining 6 Apply instances account for 0.25 of the runtime)
Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>
0.139 * mul
0.134 * _dot22
0.092 * sub
0.085 * Elemwise{Sub{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1779f10>}}[(0, 0)]
0.053 * InplaceDimShuffle{x,0}
0.049 * InplaceDimShuffle{1,0}
0.049 * Elemwise{ScalarSigmoid{output_types_preference=<theano.scalar.basic.transfer_type object at 0x171e650>}}[(0, 0)]
0.049 * InplaceDimShuffle{x}
0.049 * InplaceDimShuffle{x,x}
0.048 * Sum{0}
0.045 * sqr
0.045 * Sum
0.043 * Sum{1}
0.042 * Elemwise{Mul{output_types_preference=<theano.scalar.basic.transfer_type object at 0x17a0f50>}}[(0, 1)]
0.041 * Elemwise{Add{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1736a50>}}[(0, 0)]
0.039 * Elemwise{Second{output_types_preference=<theano.scalar.basic.transfer_type object at 0x1736d90>}}[(0, 1)]
... (remaining 0 Ops account for 0.00 of the runtime)
(*) Op is running a c implementation
"""
.. note::
***TODO***
The following text was recovered from a recent version of the source
file... hopefully things haven't gotten too out-of-sync!
The first show an Apply-wise summary, the second show an Op-wise summary, the third show an type-Op-wise summary.
The Apply-wise summary print the timing information for the worst
offending Apply nodes. This corresponds to individual Op applications
within your graph which take the longest to execute (so if you use dot
twice, you will see two entries there).
The Op-wise summary print the execution time of all Apply nodes
executing the same Op are grouped together and the total execution
time per Op is shown (so if you use dot twice, you will see only one
entry there corresponding to the sum of the time spent in each of
them). If two Op have different hash value, they will be separate.
The type-Op-wise summary group the result by type of op. So event if
two Op have different hash value, they will be merged.
Their is an hack with the Op-wise summary. Go see it if you want to know more.
The summary has two components to it. In the first section called the
Apply-wise summary, timing information is provided for the worst
offending Apply nodes. This corresponds to individual Op applications
within your graph which take the longest to execute (so if you use
``dot`` twice, you will see two entries there). In the second portion,
the Op-wise summary, the execution time of all Apply nodes executing
the same Op are grouped together and the total execution time per Op
is shown (so if you use ``dot`` twice, you will see only one entry
there corresponding to the sum of the time spent in each of them).
Note that the ProfileMode also shows which Ops were running a c
implementation.
Developers wishing to optimize the performance of their graph should
focus on the worst offending Ops and Apply nodes -- either by optimizing an
implementation, providing a missing C implementation, or by writing a graph
optimization that eliminates the offending Op altogether.
You should strongly consider emailing one of our lists about your issue before
spending too much time on this.
Reference
=========
.. class:: ProfileMode(Mode)
.. method:: print_summary(n_apply_to_print=None, n_ops_to_print=None)
Print three summaries to stdout that show where cpu time is spent during theano function executions (for all functions using this object instance).
:param n_apply_to_print: the number of apply nodes to print.
The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`.
:param n_ops_to_print: the number of ops to print.
Default 20, or but can be configured via ``ProfileMode.n_apply_to_print`` in :envvar:`THEANO_FLAGS`.
:returns: None
.. method:: print_diff_summary(self, other, n_apply_to_print=None, n_ops_to_print=None):
""" As print_summary, but print the difference on two different profile mode.
TODO: Also we don't print the Apply-wise summary as it don't work for now.
TODO: make comparaison with gpu code.
:param other: the other instance of ProfileMode that we want to be compared to.
:param n_apply_to_print: the number of apply nodes to print.
The default 15, but can be configured via ``ProfileMode.n_ops_to_print`` in :envvar:`THEANO_FLAGS`.
:param n_ops_to_print: the number of ops to print.
Default 20, or but can be configured via ``ProfileMode.n_apply_to_print`` in :envvar:`THEANO_FLAGS`.
:returns: None
...@@ -315,7 +315,7 @@ import theano and print the config variable, as in: ...@@ -315,7 +315,7 @@ import theano and print the config variable, as in:
.. attribute:: mode .. attribute:: mode
String value: ``'Mode'``, ``'ProfileMode'`` (deprecated), ``'DebugMode'``, ``'FAST_RUN'``, String value: ``'Mode'``, ``'DebugMode'``, ``'FAST_RUN'``,
``'FAST_COMPILE'`` ``'FAST_COMPILE'``
Default: ``'Mode'`` Default: ``'Mode'``
......
...@@ -284,7 +284,7 @@ First, make sure you're running in ``FAST_RUN`` mode. Even though ...@@ -284,7 +284,7 @@ First, make sure you're running in ``FAST_RUN`` mode. Even though
to ``theano.function`` (or ``theano.make``) or by setting :attr:`config.mode` to ``theano.function`` (or ``theano.make``) or by setting :attr:`config.mode`
to ``FAST_RUN``. to ``FAST_RUN``.
Second, try the Theano :ref:`using_profilemode`. This will tell you which Second, try the Theano :ref:`profiling <tut_profiling>`. This will tell you which
``Apply`` nodes, and which ops are eating up your CPU cycles. ``Apply`` nodes, and which ops are eating up your CPU cycles.
Tips: Tips:
......
...@@ -248,13 +248,3 @@ constructor arguments. The keyword version of DebugMode (which you get by using ...@@ -248,13 +248,3 @@ constructor arguments. The keyword version of DebugMode (which you get by using
is quite strict. is quite strict.
For more detail, see :ref:`DebugMode<debugmode>` in the library. For more detail, see :ref:`DebugMode<debugmode>` in the library.
.. _using_profilemode:
ProfileMode
===========
.. note::
ProfileMode is deprecated. Use :attr:`config.profile` instead.
...@@ -27,7 +27,7 @@ functions using either of the following two options: ...@@ -27,7 +27,7 @@ functions using either of the following two options:
:attr:`profiling.n_ops` and :attr:`profiling.min_memory_size` :attr:`profiling.n_ops` and :attr:`profiling.min_memory_size`
to modify the quantity of information printed. to modify the quantity of information printed.
2. Pass the argument :attr:`profile=True` to the function :func:`theano.function <function.function>`. And then call :attr:`f.profile.print_summary()` for a single function. 2. Pass the argument :attr:`profile=True` to the function :func:`theano.function <function.function>`. And then call :attr:`f.profile.summary()` for a single function.
- Use this option when you want to profile not all the - Use this option when you want to profile not all the
functions but one or more specific function(s). functions but one or more specific function(s).
- You can also combine the profile of many functions: - You can also combine the profile of many functions:
...@@ -39,7 +39,7 @@ functions using either of the following two options: ...@@ -39,7 +39,7 @@ functions using either of the following two options:
f = theano.function(..., profile=profile) # doctest: +SKIP f = theano.function(..., profile=profile) # doctest: +SKIP
g = theano.function(..., profile=profile) # doctest: +SKIP g = theano.function(..., profile=profile) # doctest: +SKIP
... # doctest: +SKIP ... # doctest: +SKIP
profile.print_summary() profile.summary()
......
...@@ -73,7 +73,7 @@ from theano.compile import ( ...@@ -73,7 +73,7 @@ from theano.compile import (
Mode, Mode,
predefined_modes, predefined_linkers, predefined_optimizers, predefined_modes, predefined_linkers, predefined_optimizers,
FunctionMaker, function, function_dump, OpFromGraph, FunctionMaker, function, function_dump, OpFromGraph,
ProfileMode, ProfileStats, ProfileStats,
Param, shared, as_op) Param, shared, as_op)
from theano.misc.safe_asarray import _asarray from theano.misc.safe_asarray import _asarray
......
...@@ -19,8 +19,6 @@ from theano.compile.monitormode import MonitorMode ...@@ -19,8 +19,6 @@ from theano.compile.monitormode import MonitorMode
from theano.compile.profiling import ProfileStats, ScanProfileStats from theano.compile.profiling import ProfileStats, ScanProfileStats
from theano.compile.profilemode import ProfileMode
from theano.compile.sharedvalue import (shared, shared_constructor, from theano.compile.sharedvalue import (shared, shared_constructor,
SharedVariable) SharedVariable)
from theano.compile.pfunc import pfunc, Param, rebuild_collect_shared from theano.compile.pfunc import pfunc, Param, rebuild_collect_shared
......
...@@ -1378,17 +1378,11 @@ class FunctionMaker(object): ...@@ -1378,17 +1378,11 @@ class FunctionMaker(object):
output_keys=None): output_keys=None):
mode = theano.compile.mode.get_mode(mode) mode = theano.compile.mode.get_mode(mode)
# figure out which profile object to use (if any) # Assert old way of working isn't used
# to help with forward-porting ProfileMode, if getattr(mode, 'profile', None):
# we allow ProfileMode to provide a ProfileStats object
# using this somewhat awkward mechanism.
mode_profile = getattr(mode, 'profile', None)
if (profile is not None and
profile is not False and
mode_profile is not None):
raise TypeError( raise TypeError(
'profile passed via both "mode" and "profile" arguments') "profile passed via 'mode'. This isn't supported anymore")
self.profile = profile = profile or mode_profile self.profile = profile
if profile: if profile:
# This is very important: # This is very important:
# 1) We preload the cache here to don't have its timming # 1) We preload the cache here to don't have its timming
...@@ -1745,9 +1739,6 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False, ...@@ -1745,9 +1739,6 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
- FAST_COMPILE (minimal optimization) - FAST_COMPILE (minimal optimization)
- ProfileMode(deprecated): allow to print a profile mode with
mode.print_summary
- DebugMode: verify many internal conditions that are normally assumed - DebugMode: verify many internal conditions that are normally assumed
(slow) (slow)
......
...@@ -391,7 +391,7 @@ def get_mode(orig_string): ...@@ -391,7 +391,7 @@ def get_mode(orig_string):
default_mode_class): default_mode_class):
return instantiated_default_mode return instantiated_default_mode
if string in ['Mode', 'ProfileMode', 'DebugMode', 'NanGuardMode']: if string in ['Mode', 'DebugMode', 'NanGuardMode']:
if string == 'DebugMode': if string == 'DebugMode':
# need to import later to break circular dependency. # need to import later to break circular dependency.
from .debugmode import DebugMode from .debugmode import DebugMode
...@@ -403,9 +403,6 @@ def get_mode(orig_string): ...@@ -403,9 +403,6 @@ def get_mode(orig_string):
# NanGuardMode use its own linker. # NanGuardMode use its own linker.
ret = NanGuardMode(True, True, True, optimizer=config.optimizer) ret = NanGuardMode(True, True, True, optimizer=config.optimizer)
else: else:
# This might be required if the string is 'ProfileMode'
from .profilemode import ProfileMode # noqa
from .profilemode import prof_mode_instance_to_print
# TODO: Can't we look up the name and invoke it rather than using eval here? # TODO: Can't we look up the name and invoke it rather than using eval here?
ret = eval(string + ret = eval(string +
'(linker=config.linker, optimizer=config.optimizer)') '(linker=config.linker, optimizer=config.optimizer)')
...@@ -424,11 +421,6 @@ def get_mode(orig_string): ...@@ -424,11 +421,6 @@ def get_mode(orig_string):
ret = ret.requiring(*theano.config.optimizer_requiring.split(':')) ret = ret.requiring(*theano.config.optimizer_requiring.split(':'))
instantiated_default_mode = ret instantiated_default_mode = ret
# must tell python to print the summary at the end.
if string == 'ProfileMode':
# need to import later to break circular dependency.
prof_mode_instance_to_print.append(ret)
return ret return ret
......
...@@ -79,7 +79,7 @@ class MonitorMode(Mode): ...@@ -79,7 +79,7 @@ class MonitorMode(Mode):
Create a new instance of this Mode. Create a new instance of this Mode.
Keyword arguments can be provided for the linker, but they will be Keyword arguments can be provided for the linker, but they will be
ignored, because ProfileMode needs to use its own linker. ignored, because MonitorMode needs to use its own linker.
""" """
if optimizer == "": if optimizer == "":
......
...@@ -366,6 +366,8 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None, ...@@ -366,6 +366,8 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
if profile is None: if profile is None:
profile = config.profile profile = config.profile
# profile -> True or False # profile -> True or False
if profile is False:
profile = None
if profile is True: if profile is True:
profile = ProfileStats(message=name) profile = ProfileStats(message=name)
# profile -> object # profile -> object
......
from __future__ import absolute_import, print_function, division
import atexit
import copy
import os
import time
import warnings
import theano
from theano.gof.link import WrapLinker
from six import string_types, iteritems, itervalues
from theano.compile.mode import (Mode, register_mode,
predefined_modes, predefined_linkers,
predefined_optimizers)
from theano.configparser import config
from theano.compile.function_module import FunctionMaker
from .profiling import ProfileStats
run_cthunk = None # Will be imported only when needed.
import_time = time.time()
class Profile_Maker(FunctionMaker):
def create(self, input_storage=None, trustme=False, storage_map=None):
ret = super(Profile_Maker, self).create(input_storage, trustme,
storage_map)
if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled):
if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1':
raise Exception(
"You are running the Theano profiler with CUDA enabled."
" Theano GPU ops execution is asynchronous by default."
" So by default, the profile is useless."
" You must set the environment variable"
" CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to"
" synchronize the execution to get a meaningful profile.")
# create a function-specific storage container for profiling info
profile = ProfileStats(atexit_print=False)
self.mode.profile_stats[ret] = profile
ret.profile = profile
# initialize the timers
for i, node in enumerate(ret.maker.fgraph.toposort()):
profile.apply_time[node] = 0.0
# a thunk_group is a list of the thunks from each linker
# corresponding to the i'th position in the toposort.
assert len(ret.fn.thunk_groups[i]) == 1
profile.apply_cimpl[node] = hasattr(
ret.fn.thunk_groups[i][0],
'cthunk')
# Here we replace the linker function.
# This ugliness makes WrapLinker (an object that *generates*
# functions and is not function-specific) work with ProfileStats
# objects which are function-specific.
# capture old fn in closure. This is important since new_fn is about to
# take its place as ret.fn.
ret_fn = ret.fn
def new_fn():
self.mode.apply_time = self.mode.profile_stats[ret].apply_time
self.mode.variable_shape = \
self.mode.profile_stats[ret].variable_shape
ret_fn()
# delete the old apply_time variable
# because it doesn't mean the same thing anymore.
# This prevents old code from looking like it still works.
del self.mode.apply_time
del self.mode.variable_shape
ret.fn = new_fn
global run_cthunk
if run_cthunk is None and any(profile.apply_cimpl.values()):
# Lazy import to avoid compilation when importing theano.
from theano.gof.cutils import run_cthunk # noqa
warnings.warn(
"DEPRECATION WARNING: The ProfileMode is deprecated. "
"Use the Theano flags/parameter to theano.function "
"'profile=True' instead of 'mode=ProfileMode'")
return ret
class ProfileMode(Mode):
def __init__(self, linker=None, optimizer='default'):
if linker is None:
linker = config.linker
if optimizer is 'default':
optimizer = config.optimizer
message = ""
profile_stats = {}
self.__setstate__((linker,
optimizer,
message,
profile_stats))
def function_maker(self, i, o, m, *args, **kwargs):
"""
Return an instance of `Profiler_Maker` which init the count.
"""
assert m is self
return Profile_Maker(i, o, self, *args, **kwargs)
def __get_local_time(self):
rval = 0
for ps in itervalues(self.profile_stats):
rval += sum(ps.apply_time.values())
return rval
local_time = property(__get_local_time)
def __getstate__(self):
# print "__getstate__",self.provided_linker,self.provided_optimizer
return (self.provided_linker,
self.provided_optimizer,
self.message,
self.profile_stats)
def __setstate__(self, state):
linker, optimizer, message, profile_stats = state
self.message = message
self.profile_stats = profile_stats
def profile_thunk(i, node, th):
"""
Profile only the execution time.
"""
global run_cthunk
if hasattr(th, 'cthunk'):
t0 = time.time()
failure = run_cthunk(th.cthunk)
dt = time.time() - t0
if failure:
raise RuntimeError(
('A C Op raised an exception. ProfileMode cannot'
' tell you what it was though. Use a standard mode'
' such as FAST_RUN to correct the problem.'))
else:
t0 = time.time()
th()
dt = time.time() - t0
# Some Op are so fast that the time.time() resolution is
# insufficient to measure it. So we add an epsilon.
self.apply_time[node] += max(dt, 1e-14)
def profile_thunk2(i, node, th):
"""
Profile the execution time and the memory size.
"""
global run_cthunk
if hasattr(th, 'cthunk'):
t0 = time.time()
failure = run_cthunk(th.cthunk)
dt = time.time() - t0
if failure:
raise RuntimeError(
('A C Op raised an exception. ProfileMode cannot'
' tell you what it was though. Use a standard mode'
' such as FAST_RUN to correct the problem.'))
else:
t0 = time.time()
th()
dt = time.time() - t0
for var, data in zip(node.outputs, th.outputs):
sh = getattr(data[0], 'shape', 'input no shape')
self.variable_shape[var] = sh
self.apply_time[node] += max(dt, 1e-14)
self.provided_linker = linker
self.provided_optimizer = optimizer
if isinstance(linker, string_types) or linker is None:
linker = predefined_linkers[linker]
if not config.ProfileMode.profile_memory:
p_thunk = profile_thunk
else:
p_thunk = profile_thunk2
linker = WrapLinker([linker], p_thunk)
self.linker = linker
if isinstance(optimizer, string_types) or optimizer is None:
optimizer = predefined_optimizers[optimizer]
self._optimizer = optimizer
self.call_time = 0
self.fn_time = 0
def print_summary(self, **kwargs):
"""
Print 3 summaries that show where time is spent. The first shows
an Apply-wise summary, the second an Op-wise summary and the
third a type-Op-wise summary.
The Apply-wise summary prints the timing information for the
worst offending Apply nodes. This corresponds to individual Op
applications within your graph which take the longest to
execute (so if you use dot twice, you will see two entries
there).
The Op-wise summary prints the execution time of all Apply
nodes executing the same Op grouped together and the total
execution time per Op is shown (so if you use dot twice, you
will see only one entry there corresponding to the sum of the
time spent in each of them). If two Ops have different hash
value, they will be separate.
The type-Op-wise summary group the result by type of op. So
event if two Op have different hash value, they will be
merged.
There is an hack with the Op-wise summary. Go see it if you
want to know more.
Parameters
----------
kwargs
They are passed to print_summary_ expanded. Currently there is
n_apply_to_print, n_ops_to_print and min_memory_size that are
accepted.
"""
compile_time = sum([ps.compile_time for ps
in self.profile_stats.values()])
fct_call = dict([(fn, ps.fct_callcount)
for (fn, ps) in iteritems(self.profile_stats)])
fct_call_time = dict([(fn, ps.fct_call_time)
for (fn, ps) in iteritems(self.profile_stats)])
apply_time = {}
for fn, ps in iteritems(self.profile_stats):
for (i, node) in enumerate(fn.maker.fgraph.toposort()):
apply_time[(i, node)] = ps.apply_time[node]
for (i, n), t in iteritems(apply_time):
if t == 0:
print(i, n)
apply_cimpl = {}
for ps in itervalues(self.profile_stats):
apply_cimpl.update(ps.apply_cimpl)
message = self.message
variable_shape = {}
for ps in itervalues(self.profile_stats):
variable_shape.update(ps.variable_shape)
other_time = dict(
linker_time=sum(
[ps.linker_time for ps in self.profile_stats.values()]),
optimizer_time=sum(
[ps.optimizer_time for ps in self.profile_stats.values()]))
self.print_summary_("print_summary",
compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, variable_shape,
self.local_time, other_time,
**kwargs)
def print_diff_summary(self, other, **kwargs):
"""
As print_summary, but print the difference on two different
profile mode.
TODO: Also we don't print the Apply-wise summary as it don't
work for now.
TODO: make comparaison with gpu code.
Parameters
----------
other
The other instance of ProfileMode that we want to be compared to.
kwargs
They are passed to print_summary_ expanded.
Currently there is n_apply_to_print, n_ops_to_print and
min_memory_size that are accepted.
"""
def diff_dict(a_time, b_time_):
r = {}
b_time = copy.copy(b_time_)
for a, ta in iteritems(a_time):
r.setdefault(a, 0)
tb = b_time.pop(a, 0)
r[a] += ta - tb
# they are missing in a
for a, t in iteritems(b_time):
r.setdefault(a, 0)
r[a] += t
return r
compile_time = self.compile_time - other.compile_time
fct_call_time = diff_dict(self.fct_call_time, other.fct_call_time)
fct_call = diff_dict(self.fct_call, other.fct_call)
apply_time = diff_dict(self.apply_time, other.apply_time)
apply_cimpl = self.apply_cimpl and other.apply_cimpl
message = self.message
variable_shape = diff_dict(self.variable_shape, other.variable_shape)
self_linker_time = sum([ps.linker_time for ps
in self.profile_stats.values()])
other_linker_time = sum([ps.linker_time for ps
in other.profile_stats.values()])
self_optimizer_time = sum([ps.optimizer_time for ps
in self.profile_stats.values()])
other_optimizer_time = sum([ps.optimizer_time for ps
in other.profile_stats.values()])
other_time = {'linker_time': self_linker_time - other_linker_time,
'optimizer_time': self_optimizer_time -
other_optimizer_time}
self.print_summary_("print_diff_summary", compile_time,
fct_call_time, fct_call,
apply_time, apply_cimpl, message, variable_shape,
print_apply=False, other_time=other_time,
**kwargs)
@staticmethod
def print_summary_(fct_name, compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, variable_shape,
local_time, other_time,
n_apply_to_print=config.ProfileMode.n_apply_to_print,
n_ops_to_print=config.ProfileMode.n_ops_to_print,
print_apply=True,
min_memory_size=config.ProfileMode.min_memory_size,
):
"""
Do the actual printing of print_summary and print_diff_summary.
Parameters
----------
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
min_memory_size
Don't print memory profile of apply whose outputs memory size is
lower than that.
"""
print("ProfileMode is deprecated! Use the new profiler.")
print(" The Theano flags to enable it ise: profile=True")
print(" The Theano flags for the memory profile to it is: "
"profile_memory=True")
total_time = time.time() - import_time
total_fct_time = sum(fct_call_time.values())
total_fct_call = sum(fct_call.values())
unknown_time = total_time - total_fct_time - compile_time
overhead_time = total_fct_time - local_time
if total_fct_time > 0:
time_pr_in_fct = local_time / total_fct_time * 100
overhead_time_pourcent_fct_time = (overhead_time / total_fct_time *
100)
time_per_call = total_fct_time / total_fct_call
else:
time_pr_in_fct = 0
overhead_time_pourcent_fct_time = 0
time_per_call = 0
print()
print('ProfileMode.%s(%s)' % (fct_name, message))
print('---------------------------')
print()
print('Time since import %.3fs' % (total_time))
print('Theano compile time: %.3fs (%.1f%% since import)' %
(compile_time, compile_time / total_time * 100))
print(' Optimization time: %.3fs' % (other_time['optimizer_time']))
print(' Linker time: %.3fs' % (other_time['linker_time']))
print('Theano fct call %.3fs (%.1f%% since import)' %
(total_fct_time, total_fct_time / total_time * 100))
print(' Theano Op time %.3fs %.1f%%(since import) %.1f%%'
'(of fct call)' % (local_time, local_time / total_time * 100,
time_pr_in_fct))
print(' Theano function overhead in ProfileMode %.3fs %.1f%%'
'(since import) %.1f%%(of fct call)' % (
overhead_time, overhead_time / total_time * 100,
overhead_time_pourcent_fct_time))
print('%i Theano fct call, %.3fs per call' %
(total_fct_call, time_per_call))
print('Rest of the time since import %.3fs %.1f%%' %
(unknown_time, unknown_time / total_time * 100))
print()
print('Theano fct summary:')
print('<% total fct time> <total time> <time per call> <nb call> '
'<fct name>')
for key in fct_call:
if fct_call[key] > 0:
print(' %4.1f%% %.3fs %.2es %d %s' %
(fct_call_time[key] / total_fct_time * 100,
fct_call_time[key],
fct_call_time[key] / fct_call[key],
fct_call[key],
key.name))
else:
print(' NOT CALLED', key.name)
# Compute stats per op.
op_time = {}
op_call = {}
op_apply = {}
op_cimpl = {}
sop_apply = {}
for (i, a), t in iteritems(apply_time):
op = a.op
op_time.setdefault(op, 0)
op_call.setdefault(op, 0)
op_apply.setdefault(op, 0)
sop_apply.setdefault(type(a.op), 0)
op_time[op] += t
nb_call = [v for k, v in iteritems(fct_call)
if k.maker.fgraph is a.fgraph][0]
op_cimpl.setdefault(a.op, True)
op_cimpl[a.op] = op_cimpl[a.op] and apply_cimpl.get(a, False)
if t == 0:
assert nb_call == 0, nb_call
else:
op_call[op] += nb_call
op_apply[op] += 1
sop_apply[type(a.op)] += 1
# Compute stats per op class
sop_time = {}
sop_call = {}
sop_op = {}
# map each op class to Bool. True iff all applies were done in c.
sop_cimpl = {}
for a, t in iteritems(op_time):
typ = type(a)
sop_time.setdefault(typ, 0)
sop_time[typ] += t
sop_op.setdefault(typ, 0)
sop_op[typ] += 1
sop_cimpl.setdefault(typ, True)
sop_cimpl[typ] = sop_cimpl[typ] and op_cimpl.get(a, False)
sop_call[typ] = sop_call.get(typ, 0) + op_call[a]
# Print the summary per op class.
print()
print('Single Op-wise summary:')
print('<% of local_time spent on this kind of Op> <cumulative %> '
'<self seconds> <cumulative seconds> <time per call> [*] '
'<nb_call> <nb_op> <nb_apply> <Op name>')
sotimes = [(t * 100 / local_time, t, a, sop_cimpl[a], sop_call[a],
sop_op[a], sop_apply[a]) for a, t in iteritems(sop_time)]
sotimes.sort()
sotimes.reverse()
tot = 0
for f, t, a, ci, nb_call, nb_op, nb_apply in sotimes[:n_ops_to_print]:
if nb_call == 0:
assert t == 0
continue
tot += t
ftot = tot * 100 / local_time
if ci:
msg = '*'
else:
msg = ' '
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %5d %2d '
'%2d %s' % (f, ftot, t, tot, t / nb_call, msg, nb_call,
nb_op, nb_apply, a))
print(' ... (remaining %i single Op account for %.2f%%(%.2fs) of '
'the runtime)' %
(max(0, len(sotimes) - n_ops_to_print),
sum(soinfo[0] for soinfo in sotimes[n_ops_to_print:]),
sum(soinfo[1] for soinfo in sotimes[n_ops_to_print:])))
print('(*) Op is running a c implementation')
# The summary per op
op_flops = {}
for a, t in iteritems(op_time):
if hasattr(a, 'flops'):
op_flops[a] = a.flops * op_call[a] / t / 1e6
flops_msg = ''
if op_flops:
flops_msg = ' <MFlops/s>'
print("\nHACK WARNING: we print the flops for some OP, but the "
"logic doesn't always work. You need to know the "
"internals of Theano to make it work correctly. "
"Otherwise don't use it!")
print()
print('Op-wise summary:')
print('<%% of local_time spent on this kind of Op> <cumulative %%> '
'<self seconds> <cumulative seconds> <time per call> [*] %s '
'<nb_call> <nb apply> <Op name>' % (flops_msg))
otimes = [(t * 100 / local_time, t, a, op_cimpl.get(a, 0),
op_call.get(a, 0), op_apply.get(a, 0))
for a, t in iteritems(op_time)]
otimes.sort()
otimes.reverse()
tot = 0
for f, t, a, ci, nb_call, nb_apply in otimes[:n_ops_to_print]:
if nb_call == 0:
assert t == 0
continue
tot += t
ftot = tot * 100 / local_time
if ci:
msg = '*'
else:
msg = ' '
if op_flops:
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %7.1f '
'%5d %2d %s' % (f, ftot, t, tot, t / nb_call, msg,
op_flops.get(a, -1), nb_call, nb_apply,
a))
else:
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %5d %2d '
'%s' % (f, ftot, t, tot, t / nb_call, msg, nb_call,
nb_apply, a))
print(' ... (remaining %i Op account for %6.2f%%(%.2fs) of the '
'runtime)' %
(max(0, len(otimes) - n_ops_to_print),
sum(f for f, t, a, ci, nb_call, nb_op in
otimes[n_ops_to_print:]),
sum(t for f, t, a, ci, nb_call, nb_op in
otimes[n_ops_to_print:])))
print('(*) Op is running a c implementation')
if print_apply:
print()
print('Apply-wise summary:')
print('<% of local_time spent at this position> <cumulative %%> '
'<apply time> <cumulative seconds> <time per call> [*] '
'<nb_call> <Apply position> <Apply Op name>')
atimes = [(t * 100 / local_time, t, a,
[v for k, v in iteritems(fct_call)
if k.maker.fgraph is a[1].fgraph][0])
for a, t in iteritems(apply_time)]
atimes.sort()
atimes.reverse()
tot = 0
for f, t, a, nb_call in atimes[:n_apply_to_print]:
tot += t
ftot = tot * 100 / local_time
if nb_call == 0:
continue
if apply_cimpl.get(a[1], False):
msg = '*'
else:
msg = ' '
print(' %4.1f%% %5.1f%% %5.3fs %5.3fs %.2es %s %i '
'%2i %s' %
(f, ftot, t, tot, t / nb_call, msg, nb_call, a[0],
str(a[1])))
print(' ... (remaining %i Apply instances account for '
'%.2f%%(%.2fs) of the runtime)' %
(max(0, len(atimes) - n_apply_to_print),
sum(f for f, t, a, nb_call in atimes[n_apply_to_print:]),
sum(t for f, t, a, nb_call in atimes[n_apply_to_print:])))
print('(*) Op is running a c implementation')
for printer in profiler_printers:
printer(fct_name, compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, variable_shape,
other_time)
if not variable_shape:
print("\nProfile of Theano intermediate memory disabled. "
"To enable, set the Theano flag ProfileMode.profile_memory "
"to True.")
else:
print("""
The memory profile in ProfileMode is removed!
Use the new profiler. Use the Theano flags
profile=True,profile_memory=True to enable it.""")
print()
print("""Here are tips to potentially make your code run faster
(if you think of new ones, suggest them on the mailing list).
Test them first, as they are not guaranteed to always provide a speedup.""")
from theano import tensor as T
from theano.tensor.raw_random import RandomFunction
import theano
import theano.scalar as scal
scalar_op_amdlibm_no_speed_up = [scal.LT, scal.GT, scal.LE, scal.GE,
scal.EQ, scal.NEQ, scal.InRange,
scal.Switch, scal.OR, scal.XOR,
scal.AND, scal.Invert, scal.Maximum,
scal.Minimum, scal.Add, scal.Mul,
scal.Sub, scal.TrueDiv, scal.IntDiv,
scal.Clip, scal.Second, scal.Identity,
scal.Cast, scal.Sgn, scal.Neg,
scal.Inv, scal.Sqr]
scalar_op_amdlibm_speed_up = [scal.Mod, scal.Pow, scal.Ceil,
scal.Floor, scal.RoundHalfToEven,
scal.RoundHalfAwayFromZero, scal.Log,
scal.Log2, scal.Log10, scal.Log1p,
scal.Exp, scal.Sqrt, scal.Abs, scal.Cos,
scal.Sin, scal.Tan, scal.Tanh,
scal.Cosh, scal.Sinh,
T.nnet.sigm.ScalarSigmoid,
T.nnet.sigm.ScalarSoftplus]
def get_scalar_ops(s):
if isinstance(s, theano.scalar.Composite):
l = []
for node in s.fgraph.toposort():
l += get_scalar_ops(node.op)
return l
else:
return [s]
def list_scalar_op(op):
if isinstance(op.scalar_op, theano.scalar.Composite):
return get_scalar_ops(op.scalar_op)
else:
return [op.scalar_op]
def amdlibm_speed_up(op):
if not isinstance(op, T.Elemwise):
return False
else:
l = list_scalar_op(op)
for s_op in l:
if s_op.__class__ in scalar_op_amdlibm_speed_up:
return True
elif s_op.__class__ not in scalar_op_amdlibm_no_speed_up:
print("We don't know if amdlibm will accelerate "
"this scalar op.", s_op)
return False
def exp_float32_op(op):
if not isinstance(op, T.Elemwise):
return False
else:
l = list_scalar_op(op)
return any([s_op.__class__ in [scal.Exp] for s_op in l])
printed_tip = False
# tip 1
if config.floatX == 'float64':
print(" - Try the Theano flag floatX=float32")
printed_tip = True
# tip 2
if not config.lib.amdlibm and any([amdlibm_speed_up(a.op) for i, a
in apply_time]):
print(" - Try installing amdlibm and set the Theano flag "
"lib.amdlibm=True. This speeds up only some Elemwise "
"operation.")
printed_tip = True
# tip 3
if not config.lib.amdlibm and any([exp_float32_op(a.op) and
a.inputs[0].dtype == 'float32'
for i, a in apply_time]):
print(" - With the default gcc libm, exp in float32 is slower "
"than in float64! Try Theano flag floatX=float64, or "
"install amdlibm and set the theano flags lib.amdlibm=True")
printed_tip = True
# tip 4
for a, t in iteritems(apply_time):
node = a[1]
if (isinstance(node.op, T.Dot) and
all([len(i.type.broadcastable) == 2
for i in node.inputs])):
print(" - You have a dot operation that was not optimized to"
" dot22 (which is faster). Make sure the inputs are "
"float32 or float64, and are the same for both inputs. "
"Currently they are: %s" %
[i.type for i in node.inputs])
printed_tip = True
# tip 5
for a, t in iteritems(apply_time):
node = a[1]
if isinstance(node.op, RandomFunction):
printed_tip = True
print(" - Replace the default random number generator by "
"'from theano.sandbox.rng_mrg import MRG_RandomStreams "
"as RandomStreams', as this is is faster. It is still "
"experimental, but seems to work correctly.")
if config.device.startswith("gpu"):
print(" - MRG_RandomStreams is the only random number"
" generator supported on the GPU.")
break
# tip 6
import theano.sandbox.cuda as cuda
from theano.tensor.nnet import LogSoftmax
import theano.tensor.signal.pool as pool
import theano.gpuarray
for a, t in iteritems(apply_time):
node = a[1]
if (isinstance(node.op, pool.Pool)):
if (not cuda.dnn.dnn_available() and not theano.gpuarray.dnn.dnn_present()):
print("Install CuDNN to do pooling faster"
"this allows the operation to run on GPU")
if (isinstance(node.op, LogSoftmax)):
if (not cuda.dnn.dnn_available() and not theano.gpuarray.dnn.dnn_present()):
print("Install CuDNN to do LogSoftmax faster"
"this allows the operation to run on GPU")
if not printed_tip:
print(" Sorry, no tip for today.")
def clone(self, link_kwargs=None, optimizer="", message=None):
"""
Create a new instance of this Mode.
Keyword arguments can be provided for the linker, in which case its
`clone` method will be called with these arguments.
"""
new_linker = self.linker.clone(**link_kwargs)
new_optimizer = optimizer
if optimizer == "":
new_optimizer = self.provided_optimizer
new_mode = type(self)(linker=new_linker,
optimizer=new_optimizer)
# If self is in the list or profiles to print, then add the
# new one as well
if self in prof_mode_instance_to_print:
prof_mode_instance_to_print.append(new_mode)
if message:
new_mode.message = message
return new_mode
register_mode('PROFILE_MODE', ProfileMode())
# needed to print the profile at the end automatically
prof_mode_instance_to_print = [predefined_modes["PROFILE_MODE"]]
def atexit_print_default_profile_mode():
"""
Print the summary of the predefined mode ProfileMode if used.
This all to have the summary printed at exit when config.mode=ProfileMode.
"""
for prof_mode in prof_mode_instance_to_print:
if prof_mode.local_time > 0:
prof_mode.print_summary()
# Register atexit_print_default_profile_mode to have the summary of the
# predefined mode ProfileMode if it is used printed when the program terminate.
atexit.register(atexit_print_default_profile_mode)
# Here we define an hook that allow to print extra profiling information
profiler_printers = []
def register_profiler_printer(fct):
profiler_printers.append(fct)
return fct
...@@ -3,8 +3,6 @@ ProfileStats object for runtime and memory profiling. ...@@ -3,8 +3,6 @@ ProfileStats object for runtime and memory profiling.
""" """
# #
# TODO: measure memory usage like ProfileMode did
# TODO: put the optimization tips into a tips section??
# TODO: add tip to use specify_shape (is specify_shape even in library doc?) # TODO: add tip to use specify_shape (is specify_shape even in library doc?)
# TODO: ensure field width for string fields makes columns line up # TODO: ensure field width for string fields makes columns line up
# TODO: what to do about 'diff summary'? (ask Fred?) # TODO: what to do about 'diff summary'? (ask Fred?)
...@@ -378,7 +376,7 @@ class ProfileStats(object): ...@@ -378,7 +376,7 @@ class ProfileStats(object):
else: else:
local_time = 0 local_time = 0
if local_time == 0: if local_time == 0:
print(('ProfileMode.summary_class: total time 0' print(('ProfileStats.summary_class: total time 0'
' (did you forget to enable counters?)'), file=file) ' (did you forget to enable counters?)'), file=file)
return return
class_time = self.class_time() class_time = self.class_time()
...@@ -462,7 +460,7 @@ class ProfileStats(object): ...@@ -462,7 +460,7 @@ class ProfileStats(object):
else: else:
local_time = 0 local_time = 0
if local_time == 0: if local_time == 0:
print(('ProfileMode.summary_ops: total time 0' print(('ProfileStats.summary_ops: total time 0'
' (did you forget to enable counters?)'), file=file) ' (did you forget to enable counters?)'), file=file)
return return
op_time = self.op_time() op_time = self.op_time()
...@@ -540,7 +538,7 @@ class ProfileStats(object): ...@@ -540,7 +538,7 @@ class ProfileStats(object):
else: else:
local_time = 0 local_time = 0
if local_time == 0: if local_time == 0:
print(('ProfileMode.summary_nodes: total time 0' print(('ProfileStats.summary_nodes: total time 0'
' (did you forget to enable counters?)'), file=file) ' (did you forget to enable counters?)'), file=file)
return return
......
...@@ -7,7 +7,7 @@ import unittest ...@@ -7,7 +7,7 @@ import unittest
import theano import theano
import theano.tensor as T import theano.tensor as T
from theano.compile import Mode, ProfileMode from theano.compile import Mode
class T_bunch_of_modes(unittest.TestCase): class T_bunch_of_modes(unittest.TestCase):
...@@ -18,9 +18,6 @@ class T_bunch_of_modes(unittest.TestCase): ...@@ -18,9 +18,6 @@ class T_bunch_of_modes(unittest.TestCase):
linker_classes_involved = [] linker_classes_involved = []
predef_modes = ['FAST_COMPILE', 'FAST_RUN', 'DEBUG_MODE'] predef_modes = ['FAST_COMPILE', 'FAST_RUN', 'DEBUG_MODE']
# Use a new instance of ProfileMode instead of 'ProfileMode' to
# avoid printing a profile mode summary in nose output
predef_modes.append(ProfileMode())
# Linkers to use with regular Mode # Linkers to use with regular Mode
if theano.config.cxx: if theano.config.cxx:
...@@ -43,20 +40,13 @@ class T_bunch_of_modes(unittest.TestCase): ...@@ -43,20 +40,13 @@ class T_bunch_of_modes(unittest.TestCase):
# there should be # there should be
# - VM_Linker # - VM_Linker
# - OpWiseCLinker (FAST_RUN) # - OpWiseCLinker (FAST_RUN)
# - WrapLinker ("ProfileMode")
# - PerformLinker (FAST_COMPILE) # - PerformLinker (FAST_COMPILE)
# - DebugMode's Linker (DEBUG_MODE) # - DebugMode's Linker (DEBUG_MODE)
assert 5 == len(set(linker_classes_involved)) assert 4 == len(set(linker_classes_involved))
class T_ProfileMode_WrapLinker(unittest.TestCase): class T_old_problem(unittest.TestCase):
def test_1(self): def test_1(self):
# First, compile a function with a new ProfileMode() object
# No need to call that function
x = T.matrix()
mode = ProfileMode()
theano.function([x], x * 2, mode=mode)
# Then, build a mode with the same linker, and a modified optimizer # Then, build a mode with the same linker, and a modified optimizer
default_mode = theano.compile.mode.get_default_mode() default_mode = theano.compile.mode.get_default_mode()
modified_mode = default_mode.including('specialize') modified_mode = default_mode.including('specialize')
......
...@@ -405,9 +405,9 @@ AddConfigVar( ...@@ -405,9 +405,9 @@ AddConfigVar(
AddConfigVar( AddConfigVar(
'mode', 'mode',
"Default compilation mode", "Default compilation mode",
EnumStr('Mode', 'ProfileMode', 'DebugMode', 'FAST_RUN', EnumStr('Mode', 'DebugMode', 'FAST_RUN',
'NanGuardMode', 'NanGuardMode',
'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE'), 'FAST_COMPILE', 'DEBUG_MODE'),
in_c_key=False) in_c_key=False)
param = "g++" param = "g++"
...@@ -463,8 +463,7 @@ del param ...@@ -463,8 +463,7 @@ del param
if rc == 0 and config.cxx != "": if rc == 0 and config.cxx != "":
# Keep the default linker the same as the one for the mode FAST_RUN # Keep the default linker the same as the one for the mode FAST_RUN
AddConfigVar('linker', AddConfigVar('linker',
("Default linker used if the theano flags mode is Mode " "Default linker used if the theano flags mode is Mode",
"or ProfileMode(deprecated)"),
EnumStr('cvm', 'c|py', 'py', 'c', 'c|py_nogc', EnumStr('cvm', 'c|py', 'py', 'c', 'c|py_nogc',
'vm', 'vm_nogc', 'cvm_nogc'), 'vm', 'vm_nogc', 'cvm_nogc'),
in_c_key=False) in_c_key=False)
...@@ -472,8 +471,7 @@ else: ...@@ -472,8 +471,7 @@ else:
# g++ is not present or the user disabled it, # g++ is not present or the user disabled it,
# linker should default to python only. # linker should default to python only.
AddConfigVar('linker', AddConfigVar('linker',
("Default linker used if the theano flags mode is Mode " "Default linker used if the theano flags mode is Mode",
"or ProfileMode(deprecated)"),
EnumStr('vm', 'py', 'vm_nogc'), EnumStr('vm', 'py', 'vm_nogc'),
in_c_key=False) in_c_key=False)
try: try:
...@@ -501,8 +499,7 @@ AddConfigVar('allow_gc', ...@@ -501,8 +499,7 @@ AddConfigVar('allow_gc',
# Keep the default optimizer the same as the one for the mode FAST_RUN # Keep the default optimizer the same as the one for the mode FAST_RUN
AddConfigVar( AddConfigVar(
'optimizer', 'optimizer',
("Default optimizer. If not None, will use this linker with the Mode " "Default optimizer. If not None, will use this optimizer with the Mode",
"object (not ProfileMode(deprecated) or DebugMode)"),
EnumStr('fast_run', 'merge', 'fast_compile', 'None'), EnumStr('fast_run', 'merge', 'fast_compile', 'None'),
in_c_key=False) in_c_key=False)
...@@ -951,27 +948,6 @@ AddConfigVar('NanGuardMode.action', ...@@ -951,27 +948,6 @@ AddConfigVar('NanGuardMode.action',
EnumStr('raise', 'warn', 'pdb'), EnumStr('raise', 'warn', 'pdb'),
in_c_key=False) in_c_key=False)
AddConfigVar('ProfileMode.n_apply_to_print',
"Number of apply instances to print by default",
IntParam(15, lambda i: i > 0),
in_c_key=False)
AddConfigVar('ProfileMode.n_ops_to_print',
"Number of ops to print by default",
IntParam(20, lambda i: i > 0),
in_c_key=False)
AddConfigVar('ProfileMode.min_memory_size',
"For the memory profile, do not print apply nodes if the size "
"of their outputs (in bytes) is lower then this threshold",
IntParam(1024, lambda i: i >= 0),
in_c_key=False)
AddConfigVar('ProfileMode.profile_memory',
"""Enable profiling of memory used by Theano functions""",
BoolParam(False),
in_c_key=False)
AddConfigVar('optimizer_excluding', AddConfigVar('optimizer_excluding',
("When using the default mode, we will remove optimizer with " ("When using the default mode, we will remove optimizer with "
"these tags. Separate tags with ':'."), "these tags. Separate tags with ':'."),
......
...@@ -11,7 +11,6 @@ from six import iteritems, itervalues ...@@ -11,7 +11,6 @@ from six import iteritems, itervalues
import theano import theano
from theano import gof from theano import gof
from theano.compile.profilemode import ProfileMode
from theano.compile import Function from theano.compile import Function
from theano.compile import builders from theano.compile import builders
from theano.printing import pydot_imported, pydot_imported_msg from theano.printing import pydot_imported, pydot_imported_msg
...@@ -123,13 +122,6 @@ class PyDotFormatter(object): ...@@ -123,13 +122,6 @@ class PyDotFormatter(object):
profile = None profile = None
if isinstance(fct, Function): if isinstance(fct, Function):
mode = fct.maker.mode
if (not isinstance(mode, ProfileMode) or
fct not in mode.profile_stats):
mode = None
if mode:
profile = mode.profile_stats[fct]
else:
profile = getattr(fct, "profile", None) profile = getattr(fct, "profile", None)
outputs = fct.maker.fgraph.outputs outputs = fct.maker.fgraph.outputs
topo = fct.maker.fgraph.toposort() topo = fct.maker.fgraph.toposort()
......
...@@ -20,7 +20,6 @@ from theano import gof ...@@ -20,7 +20,6 @@ from theano import gof
from theano import config from theano import config
from theano.gof import Op, Apply from theano.gof import Op, Apply
from theano.compile import Function, debugmode, SharedVariable from theano.compile import Function, debugmode, SharedVariable
from theano.compile.profilemode import ProfileMode
pydot_imported = False pydot_imported = False
pydot_imported_msg = "" pydot_imported_msg = ""
...@@ -759,15 +758,10 @@ def pydotprint(fct, outfile=None, ...@@ -759,15 +758,10 @@ def pydotprint(fct, outfile=None,
config.device + '.' + format) config.device + '.' + format)
if isinstance(fct, Function): if isinstance(fct, Function):
mode = fct.maker.mode
profile = getattr(fct, "profile", None) profile = getattr(fct, "profile", None)
if (not isinstance(mode, ProfileMode) or
fct not in mode.profile_stats):
mode = None
outputs = fct.maker.fgraph.outputs outputs = fct.maker.fgraph.outputs
topo = fct.maker.fgraph.toposort() topo = fct.maker.fgraph.toposort()
elif isinstance(fct, gof.FunctionGraph): elif isinstance(fct, gof.FunctionGraph):
mode = None
profile = None profile = None
outputs = fct.outputs outputs = fct.outputs
topo = fct.toposort() topo = fct.toposort()
...@@ -780,7 +774,6 @@ def pydotprint(fct, outfile=None, ...@@ -780,7 +774,6 @@ def pydotprint(fct, outfile=None,
assert all(isinstance(v, gof.Variable) for v in fct) assert all(isinstance(v, gof.Variable) for v in fct)
fct = gof.FunctionGraph(inputs=gof.graph.inputs(fct), fct = gof.FunctionGraph(inputs=gof.graph.inputs(fct),
outputs=fct) outputs=fct)
mode = None
profile = None profile = None
outputs = fct.outputs outputs = fct.outputs
topo = fct.toposort() topo = fct.toposort()
...@@ -868,19 +861,7 @@ def pydotprint(fct, outfile=None, ...@@ -868,19 +861,7 @@ def pydotprint(fct, outfile=None,
if node in apply_name_cache: if node in apply_name_cache:
return apply_name_cache[node], apply_name_id[node] return apply_name_cache[node], apply_name_id[node]
prof_str = '' prof_str = ''
if mode: if profile:
time = mode.profile_stats[fct].apply_time.get(node, 0)
# second, % total time in profiler, %fct time in profiler
if mode.local_time == 0:
pt = 0
else:
pt = time * 100 / mode.local_time
if mode.profile_stats[fct].fct_callcount == 0:
pf = 0
else:
pf = time * 100 / mode.profile_stats[fct].fct_call_time
prof_str = ' (%.3fs,%.3f%%,%.3f%%)' % (time, pt, pf)
elif profile:
time = profile.apply_time.get(node, 0) time = profile.apply_time.get(node, 0)
# second, %fct time in profiler # second, %fct time in profiler
if profile.fct_callcount == 0: if profile.fct_callcount == 0:
......
...@@ -4092,7 +4092,8 @@ def tensor4(name=None, dtype=None): ...@@ -4092,7 +4092,8 @@ def tensor4(name=None, dtype=None):
ftensor4 = CudaNdarrayType(dtype='float32', broadcastable=(False,) * 4) ftensor4 = CudaNdarrayType(dtype='float32', broadcastable=(False,) * 4)
@theano.compile.profilemode.register_profiler_printer # TODO: move that to the new back-end and new profiling.py print_tips
# @theano.compile.profilemode.register_profiler_printer
def profile_printer(fct_name, compile_time, fct_call_time, fct_call, def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size, apply_time, apply_cimpl, message, outputs_size,
other_time): other_time):
......
...@@ -907,7 +907,7 @@ class BaseGpuCorrMM(GpuOp): ...@@ -907,7 +907,7 @@ class BaseGpuCorrMM(GpuOp):
def flops(self, inp, outp): def flops(self, inp, outp):
""" """
Useful with the hack in profilemode to print the MFlops. Useful with the hack in profiling to print the MFlops.
""" """
# if the output shape is correct, then this gives the correct # if the output shape is correct, then this gives the correct
...@@ -1421,7 +1421,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1421,7 +1421,7 @@ class BaseGpuCorr3dMM(GpuOp):
self.pad) self.pad)
def flops(self, inp, outp): def flops(self, inp, outp):
""" Useful with the hack in profilemode to print the MFlops""" """ Useful with the hack in profiling to print the MFlops"""
# if the output shape is correct, then this gives the correct # if the output shape is correct, then this gives the correct
# flops for any direction, sampling, padding, and border mode # flops for any direction, sampling, padding, and border mode
inputs, filters = inp inputs, filters = inp
...@@ -2101,7 +2101,7 @@ class GpuConv(GpuOp): ...@@ -2101,7 +2101,7 @@ class GpuConv(GpuOp):
return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()]) return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
def flops(self, inputs, outputs): def flops(self, inputs, outputs):
""" Useful with the hack in profilemode to print the MFlops""" """ Useful with the hack in profiling to print the MFlops"""
images, kerns = inputs images, kerns = inputs
out, = outputs out, = outputs
assert images[1] == kerns[1] assert images[1] == kerns[1]
......
...@@ -1367,12 +1367,12 @@ def speed_adv_sub1(): ...@@ -1367,12 +1367,12 @@ def speed_adv_sub1():
vec = tensor.lvector() vec = tensor.lvector()
for batch_size in [100, 1000, 10000, 100000]: for batch_size in [100, 1000, 10000, 100000]:
idx = numpy.random.randint(0, 50000, batch_size) idx = numpy.random.randint(0, 50000, batch_size)
mode_with_gpu = theano.compile.ProfileMode().including('gpu') mode_with_gpu = theano.compile.get_default_mode().including('gpu')
f = theano.function([vec], var[vec], mode=mode_with_gpu) f = theano.function([vec], var[vec], mode=mode_with_gpu, profile=True)
for i in range(100): for i in range(100):
f(idx) f(idx)
print("ProfileMode with batch size", batch_size) print("profile with batch size", batch_size)
mode_with_gpu.print_summary() mode_with_gpu.summary()
def speed_reduce10(): def speed_reduce10():
......
...@@ -19,7 +19,7 @@ import theano.sandbox.cuda as tcn ...@@ -19,7 +19,7 @@ import theano.sandbox.cuda as tcn
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
if theano.config.mode not in ['FAST_RUN', 'Mode', 'ProfileMode']: if theano.config.mode not in ['FAST_RUN', 'Mode']:
raise SkipTest('Skip test_mlp when not in normal optimization mode as ' raise SkipTest('Skip test_mlp when not in normal optimization mode as '
'otherwise it is too slow!') 'otherwise it is too slow!')
...@@ -48,8 +48,6 @@ def get_mode(use_gpu, check_isfinite=True): ...@@ -48,8 +48,6 @@ def get_mode(use_gpu, check_isfinite=True):
ret = theano.compile.get_default_mode() ret = theano.compile.get_default_mode()
else: else:
ret = theano.compile.mode.get_mode('FAST_RUN') ret = theano.compile.mode.get_mode('FAST_RUN')
if isinstance(ret, theano.compile.ProfileMode):
ret = copy.copy(ret)
if isinstance(ret, theano.compile.DebugMode): if isinstance(ret, theano.compile.DebugMode):
ret = copy.copy(ret) ret = copy.copy(ret)
ret.check_isfinite = check_isfinite ret.check_isfinite = check_isfinite
...@@ -60,19 +58,6 @@ def get_mode(use_gpu, check_isfinite=True): ...@@ -60,19 +58,6 @@ def get_mode(use_gpu, check_isfinite=True):
return ret return ret
def print_mode(mode):
if mode is not None and isinstance(mode, (theano.compile.ProfileMode,)):
mode.print_summary()
def print_diff_mode(a, b):
if (a is not None and
isinstance(a, (theano.compile.ProfileMode,)) and
isinstance(b, (theano.compile.ProfileMode,))):
a.print_diff_summary(b)
def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
n_train=100): n_train=100):
...@@ -123,7 +108,6 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, ...@@ -123,7 +108,6 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
rval.append(train(xval, yval, lr)) rval.append(train(xval, yval, lr))
dt = time.time() - t0 dt = time.time() - t0
print_mode(mode)
return numpy.asarray(rval), dt return numpy.asarray(rval), dt
...@@ -220,7 +204,6 @@ def run_conv_nnet1(use_gpu): ...@@ -220,7 +204,6 @@ def run_conv_nnet1(use_gpu):
for i in xrange(n_train): for i in xrange(n_train):
rval = train(xval, yval, lr) rval = train(xval, yval, lr)
# print 'training done' # print 'training done'
print_mode(mode)
return rval return rval
...@@ -316,7 +299,6 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST ...@@ -316,7 +299,6 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST
for i in xrange(n_train): for i in xrange(n_train):
rval = train(xval, yval, lr) rval = train(xval, yval, lr)
print_mode(mode)
return rval return rval
...@@ -428,7 +410,6 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch, ...@@ -428,7 +410,6 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize, def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize,
n_train=10, n_train=10,
check_isfinite=True, check_isfinite=True,
pickle=False,
verbose=0, verbose=0,
version=-1): version=-1):
"""Run the train function returned by build_conv_nnet2_classif on one device. """Run the train function returned by build_conv_nnet2_classif on one device.
...@@ -444,11 +425,6 @@ def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize, ...@@ -444,11 +425,6 @@ def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize,
version=version, version=version,
check_isfinite=check_isfinite) check_isfinite=check_isfinite)
if use_gpu:
device = 'GPU'
else:
device = 'CPU'
xval = my_rand(*x_shape) xval = my_rand(*x_shape)
yval = my_rand(*y_shape) yval = my_rand(*y_shape)
lr = theano._asarray(0.01, dtype='float32') lr = theano._asarray(0.01, dtype='float32')
...@@ -456,17 +432,6 @@ def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize, ...@@ -456,17 +432,6 @@ def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize,
rvals = my_zeros(n_train) rvals = my_zeros(n_train)
for i in xrange(n_train): for i in xrange(n_train):
rvals[i] = train(xval, yval, lr)[0] rvals[i] = train(xval, yval, lr)[0]
print_mode(mode)
if pickle and isinstance(mode, theano.compile.ProfileMode):
import pickle
print("BEGIN %s profile mode dump" % device)
print(pickle.dumps(mode))
print("END %s profile mode dump" % device)
# print "%s time: %.3f" % (device, t1-t0)
# print "estimated time for one pass through MNIST with %s: %f" % (
# device, (t1-t0) * (60000.0 / (n_train*bsize)))
def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
...@@ -476,7 +441,6 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, ...@@ -476,7 +441,6 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
cpu_only=False, cpu_only=False,
float_atol=1e-06, float_atol=1e-06,
check_isfinite=True, check_isfinite=True,
pickle=False,
verbose=0, verbose=0,
version=-1): version=-1):
"""Run the nnet2 function on 1 or 2 devices, and compares the results. """Run the nnet2 function on 1 or 2 devices, and compares the results.
...@@ -512,7 +476,6 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, ...@@ -512,7 +476,6 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
seed=seed, isize=isize, ksize=ksize, bsize=bsize, seed=seed, isize=isize, ksize=ksize, bsize=bsize,
n_train=n_train, n_train=n_train,
check_isfinite=check_isfinite, check_isfinite=check_isfinite,
pickle=pickle,
verbose=verbose, verbose=verbose,
version=version) version=version)
......
...@@ -175,7 +175,7 @@ class Scan(PureOp): ...@@ -175,7 +175,7 @@ class Scan(PureOp):
mode_instance = compile.mode.get_mode(self.mode) mode_instance = compile.mode.get_mode(self.mode)
# Clone mode_instance, altering "allow_gc" for the linker, # Clone mode_instance, altering "allow_gc" for the linker,
# and adding a message if the mode is a ProfileMode. # and adding a message if we profile
if self.name: if self.name:
message = self.name + " sub profile" message = self.name + " sub profile"
else: else:
...@@ -1564,14 +1564,6 @@ class Scan(PureOp): ...@@ -1564,14 +1564,6 @@ class Scan(PureOp):
if hasattr(self.fn.fn, 'update_profile'): if hasattr(self.fn.fn, 'update_profile'):
self.fn.fn.update_profile(profile) self.fn.fn.update_profile(profile)
#/* Old ProfileMode
# if hasattr(self.fn.maker.mode,'fct_call_time'):
# self.fn.maker.mode.fct_call_time[self.fn] += t_fn
# self.fn.maker.mode.fct_call[self.fn] += n_steps
#self.fn.maker.mode.call_time += t_fn
#self.fn.maker.mode.fn_time += t_fn
# Old Profile Mode */
self.t_call = t_call self.t_call = t_call
self.t_fn = t_fn self.t_fn = t_fn
...@@ -2839,7 +2831,8 @@ class Scan(PureOp): ...@@ -2839,7 +2831,8 @@ class Scan(PureOp):
gof.ops_with_inner_function[Scan] = 'fn' gof.ops_with_inner_function[Scan] = 'fn'
@theano.compile.profilemode.register_profiler_printer # TODO: move that to the new back-end and new profiling.py print_tips
#@theano.compile.profilemode.register_profiler_printer
def profile_printer(fct_name, compile_time, fct_call_time, fct_call, def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size, apply_time, apply_cimpl, message, outputs_size,
other_time): other_time):
......
...@@ -47,7 +47,7 @@ class TestSP(unittest.TestCase): ...@@ -47,7 +47,7 @@ class TestSP(unittest.TestCase):
filters = rng.randn(nkern, numpy.prod(kshp)) filters = rng.randn(nkern, numpy.prod(kshp))
biasvals = rng.randn(nkern) biasvals = rng.randn(nkern)
for mode in ('FAST_COMPILE', 'FAST_RUN'): # , profmode): for mode in ('FAST_COMPILE', 'FAST_RUN'):
ttot, ntot = 0, 0 ttot, ntot = 0, 0
for conv_mode in convmodes: for conv_mode in convmodes:
for ss in ssizes: for ss in ssizes:
...@@ -128,7 +128,6 @@ class TestSP(unittest.TestCase): ...@@ -128,7 +128,6 @@ class TestSP(unittest.TestCase):
# print 'Numpy processing time: ', ntot # print 'Numpy processing time: ', ntot
# print 'Theano processing time: ', ttot # print 'Theano processing time: ', ttot
# profmode.print_summary()
# this doesn't compare the output of anything... but I manually verified that the patches # this doesn't compare the output of anything... but I manually verified that the patches
# are properly generated # are properly generated
......
...@@ -719,7 +719,7 @@ class BaseAbstractConv2d(Op): ...@@ -719,7 +719,7 @@ class BaseAbstractConv2d(Op):
self.filter_dilation = tuple(filter_dilation) self.filter_dilation = tuple(filter_dilation)
def flops(self, inp, outp): def flops(self, inp, outp):
""" Useful with the hack in profilemode to print the MFlops""" """ Useful with the hack in profiling to print the MFlops"""
# if the output shape is correct, then this gives the correct # if the output shape is correct, then this gives the correct
# flops for any direction, sampling, padding, and border mode # flops for any direction, sampling, padding, and border mode
inputs, filters = inp inputs, filters = inp
......
...@@ -609,7 +609,7 @@ class ConvOp(OpenMPOp): ...@@ -609,7 +609,7 @@ class ConvOp(OpenMPOp):
def flops(self, inputs, outputs): def flops(self, inputs, outputs):
""" """
Useful with the hack in profilemode to print the MFlops. Useful with the hack in profiling to print the MFlops.
""" """
images, kerns = inputs images, kerns = inputs
......
...@@ -1394,11 +1394,7 @@ class test_fusion(unittest.TestCase): ...@@ -1394,11 +1394,7 @@ class test_fusion(unittest.TestCase):
def speed_log_exp(self): def speed_log_exp(self):
s = slice(31, 36) s = slice(31, 36)
# linker=gof.CLinker print("time", self.do(None, shared, shp=(1000, 1000), gpu=False,
linker = gof.OpWiseCLinker
mode = compile.Mode(linker(), copy.copy(compile.mode.OPT_FAST_RUN))
mode = compile.ProfileMode()
print("time", self.do(mode, shared, shp=(1000, 1000), gpu=False,
assert_len_topo=False, slice=s, nb_repeat=100)) assert_len_topo=False, slice=s, nb_repeat=100))
def tes_memory_leak(self, mode=compile.mode.Mode('c', 'merge'), def tes_memory_leak(self, mode=compile.mode.Mode('c', 'merge'),
......
...@@ -115,14 +115,17 @@ def test_pydotprint_long_name(): ...@@ -115,14 +115,17 @@ def test_pydotprint_long_name():
def test_pydotprint_profile(): def test_pydotprint_profile():
"""Just check that pydotprint does not crash with ProfileMode.""" """Just check that pydotprint does not crash with profile."""
# Skip test if pydot is not available. # Skip test if pydot is not available.
if not theano.printing.pydot_imported: if not theano.printing.pydot_imported:
raise SkipTest('pydot not available') raise SkipTest('pydot not available')
A = tensor.matrix() A = tensor.matrix()
f = theano.function([A], A + 1, mode='ProfileMode') prof = theano.compile.ProfileStats(atexit_print=False)
f = theano.function([A], A + 1, profile=prof)
theano.printing.pydotprint(f, print_output_file=False)
f([[1]])
theano.printing.pydotprint(f, print_output_file=False) theano.printing.pydotprint(f, print_output_file=False)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论