提交 690d3628 authored 作者: abergeron's avatar abergeron

Merge pull request #3301 from harlouci/numpydoc_compile

Numpydoc compile
...@@ -10,10 +10,11 @@ from functools import reduce ...@@ -10,10 +10,11 @@ from functools import reduce
class OpFromGraph(gof.Op): class OpFromGraph(gof.Op):
"""This creates an `Op` from inputs and outputs lists of variables. """
This creates an `Op` from inputs and outputs lists of variables.
The signature is similar to theano.function() and the resulting The signature is similar to theano.function() and the resulting
`Op`'s perform will do the same operation as:: `Op`'s perform will do the same operation as:
orig_function(inputs, outputs, **kwargs) orig_function(inputs, outputs, **kwargs)
...@@ -31,11 +32,15 @@ class OpFromGraph(gof.Op): ...@@ -31,11 +32,15 @@ class OpFromGraph(gof.Op):
- Add support to pickle this Op. - Add support to pickle this Op.
- Add support/test with random generator - Add support/test with random generator
:note: Notes
- We support shared variables in the inner graph. This is automatic and -----
invisible to the user. They can be as input to the node or in the - We support shared variables in the inner graph. This is automatic and
inner graph. invisible to the user. They can be as input to the node or in the
- We support unused inputs. This is needed for the grad. inner graph.
- We support unused inputs. This is needed for the grad.
Examples
--------
Example 1: Example 1:
...@@ -49,8 +54,6 @@ class OpFromGraph(gof.Op): ...@@ -49,8 +54,6 @@ class OpFromGraph(gof.Op):
e2 = op(x, y, z) + op(z, y, x) e2 = op(x, y, z) + op(z, y, x)
fn = function([x, y, z], [e2]) fn = function([x, y, z], [e2])
Example 2 with shared variable: Example 2 with shared variable:
.. code-block:: python .. code-block:: python
...@@ -139,7 +142,8 @@ class OpFromGraph(gof.Op): ...@@ -139,7 +142,8 @@ class OpFromGraph(gof.Op):
def connection_pattern(self, node): def connection_pattern(self, node):
""" """
Return connection pattern of subfgraph defined by inputs and outputs Return connection pattern of subfgraph defined by inputs and outputs.
""" """
return io_connection_pattern(self.new_inputs, self.new_outputs) return io_connection_pattern(self.new_inputs, self.new_outputs)
......
"""Define the `function` function """
Define the `function` function.
""" """
import six.moves.cPickle as pickle import six.moves.cPickle as pickle
import logging import logging
...@@ -23,8 +25,9 @@ def function_dump(filename, inputs, outputs=None, mode=None, updates=None, ...@@ -23,8 +25,9 @@ def function_dump(filename, inputs, outputs=None, mode=None, updates=None,
no_default_updates=False, accept_inplace=False, name=None, no_default_updates=False, accept_inplace=False, name=None,
rebuild_strict=True, allow_input_downcast=None, profile=None, rebuild_strict=True, allow_input_downcast=None, profile=None,
on_unused_input=None): on_unused_input=None):
"""This is helpful to make a reproducable case for problem during """
Theano compilation. This is helpful to make a reproducable case for problem during Theano
compilation.
Ex: Ex:
...@@ -65,78 +68,67 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None, ...@@ -65,78 +68,67 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
""" """
Return a callable object that will calculate `outputs` from `inputs`. Return a callable object that will calculate `outputs` from `inputs`.
:type inputs: list of either Variable or Param instances. Parameters
:param inputs: function parameters, these are not allowed to be shared ----------
variables inputs : list of either Variable or Param instances.
Function parameters, these are not allowed to be shared variables.
:type outputs: list or dict of Variables or Out instances. If it is a outputs : list or dict of Variables or Out instances.
dict, the keys must be strings If it is a dict, the keys must be strings. Expressions to compute.
:param outputs: expressions to compute mode : string or `Mode` instance.
Compilation mode.
:type mode: string or `Mode` instance. updates : iterable over pairs (shared_variable, new_expression). List, tuple
:param mode: compilation mode or OrderedDict.
Updates the values for SharedVariable inputs according to these
:type updates: iterable over pairs (shared_variable, new_expression). expressions.
List, tuple or OrderedDict. givens : iterable over pairs (Var1, Var2) of Variables. List, tuple or dict.
:param updates: update the values for SharedVariable inputs The Var1 and Var2 in each pair must have the same Type.
according to these expressions Specific substitutions to make in the computation graph (Var2 replaces
Var1).
:type givens: iterable over pairs (Var1, Var2) of Variables. List, no_default_updates: either bool or list of Variables
tuple or dict. The Var1 and Var2 in each pair must If True, do not perform any automatic update on Variables. If False
have the same Type. (default), perform them all. Else, perform automatic updates on all
:param givens: specific substitutions to make in the computation Variables that are neither in "updates" nor in "no_default_updates".
graph (Var2 replaces Var1). name : str
An optional name for this function. The profile mode will print the time
:type no_default_updates: either bool or list of Variables spent in this function.
:param no_default_updates: if True, do not perform any automatic rebuild_strict : bool
update on Variables. If False (default), perform them True (Default) is the safer and better tested setting, in which case
all. Else, perform automatic updates on all Variables that are `givens` must substitute new variables with the same Type as the
neither in "updates" nor in "no_default_updates". variables they replace.
False is a you-better-know-what-you-are-doing setting, that permits
:param name: an optional name for this function. The profile mode `givens` to replace variables with new variables of any Type.
will print the time spent in this function. The consequence of changing a Type is that all results depending on that
variable may have a different Type too (the graph is rebuilt from inputs
:param rebuild_strict: True (Default) is the safer and better to outputs). If one of the new types does not make sense for one of the
tested setting, in which case `givens` must substitute new Ops in the graph, an Exception will be raised.
variables with the same Type as the variables they replace. allow_input_downcast: bool or None
False is a you-better-know-what-you-are-doing setting, that True means that the values passed as inputs when calling the function
permits `givens` to replace variables with new variables of can be silently downcasted to fit the dtype of the corresponding
any Type. The consequence of changing a Type is that all Variable, which may lose precision. False means that it will only be
results depending on that variable may have a different Type cast to a more general, or precise, type. None (default) is almost like
too (the graph is rebuilt from inputs to outputs). If one of False, but allows downcasting of Python float scalars to floatX.
the new types does not make sense for one of the Ops in the profile: None, True, or ProfileStats instance
graph, an Exception will be raised. Accumulate profiling information into a given ProfileStats instance.
If argument is `True` then a new ProfileStats instance will be used.
:type allow_input_downcast: Boolean or None This profiling object will be available via self.profile.
:param allow_input_downcast: True means that the values passed as on_unused_input
inputs when calling the function can be silently downcasted to What to do if a variable in the 'inputs' list is not used in the graph.
fit the dtype of the corresponding Variable, which may lose Possible values are 'raise', 'warn', 'ignore' and None.
precision. False means that it will only be cast to a more
general, or precise, type. None (default) is almost like Returns
False, but allows downcasting of Python float scalars to -------
floatX. Function instance
A callable object that will compute the outputs (given the inputs) and
:type profile: None, True, or ProfileStats instance update the implicit function arguments according to the `updates`.
:param profile: accumulate profiling information into a given
ProfileStats instance. If argument is `True` then a new Notes
ProfileStats instance will be used. This profiling object -----
will be available via self.profile. Regarding givens: Be careful to make sure that these
substitutions are independent--behaviour when Var1 of one pair
:param on_unused_input: What to do if a variable in the 'inputs' appears in the graph leading to Var2 in another expression is
list is not used in the graph. Possible values are 'raise', undefined. Replacements specified with givens are different
'warn', 'ignore' and None. from optimizations in that Var2 is not expected to be
equivalent to Var1.
:rtype: Function instance
:returns: a callable object that will compute the outputs (given
the inputs) and update the implicit function arguments
according to the `updates`.
:note: Regarding givens: Be careful to make sure that these
substitutions are independent--behaviour when Var1 of one pair
appears in the graph leading to Var2 in another expression is
undefined. Replacements specified with givens are different
from optimizations in that Var2 is not expected to be
equivalent to Var1.
Internal documentation: Internal documentation:
...@@ -214,6 +206,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None, ...@@ -214,6 +206,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
was easier to develop the VM in Python then translate it to C instead was easier to develop the VM in Python then translate it to C instead
of just writing it in C from scratch. of just writing it in C from scratch.
CVM stands for C Virtual Machine. CVM stands for C Virtual Machine.
""" """
if isinstance(outputs, dict): if isinstance(outputs, dict):
output_items = list(outputs.items()) output_items = list(outputs.items())
......
差异被折叠。
"""WRITEME """
WRITEME
""" """
from __future__ import print_function from __future__ import print_function
import logging import logging
...@@ -34,8 +36,9 @@ AddConfigVar('optimizer_requiring', ...@@ -34,8 +36,9 @@ AddConfigVar('optimizer_requiring',
def check_equal(x, y): def check_equal(x, y):
""" """
Returns True iff x[0] and y[0] are equal (checks the dtype and Returns True iff x[0] and y[0] are equal (checks the dtype and shape if x
shape if x and y are numpy.ndarray instances). Used internally. and y are numpy.ndarray instances). Used internally.
""" """
# I put the import here to allow using theano without scipy. # I put the import here to allow using theano without scipy.
import scipy.sparse as sp import scipy.sparse as sp
...@@ -125,17 +128,19 @@ def register_optimizer(name, opt): ...@@ -125,17 +128,19 @@ def register_optimizer(name, opt):
class AddDestroyHandler(gof.Optimizer): class AddDestroyHandler(gof.Optimizer):
"""This optimizer performs two important functions: """
This optimizer performs two important functions:
1) It has a 'requirement' of the destroyhandler. This means that the fgraph 1) It has a 'requirement' of the destroyhandler. This means that the fgraph
will include it as a feature for this optimization, and keep this feature will include it as a feature for this optimization, and keep this feature
enabled for subsequent optimizations. All optimizations that work inplace enabled for subsequent optimizations. All optimizations that work inplace
on any of their inputs must run *after* this optimization to ensure that on any of their inputs must run *after* this optimization to ensure that
the DestroyHandler has been included in the fgraph. the DestroyHandler has been included in the fgraph.
2) It tries to replace each output with an Op that purports to destroy it 2) It tries to replace each output with an Op that purports to destroy it
(but it won't I promise). If this replacement succeeds it means that (but it won't I promise). If this replacement succeeds it means that
there is a bug in theano. It should not be possible to destroy outputs. there is a bug in theano. It should not be possible to destroy outputs.
""" """
def apply(self, fgraph): def apply(self, fgraph):
for o in fgraph.outputs: for o in fgraph.outputs:
...@@ -157,11 +162,13 @@ class AddDestroyHandler(gof.Optimizer): ...@@ -157,11 +162,13 @@ class AddDestroyHandler(gof.Optimizer):
class AddNoOutputFromInplace(gof.Optimizer): class AddNoOutputFromInplace(gof.Optimizer):
"""This optimizer adds to the fgraph a feature that will prevent outputs """
This optimizer adds to the fgraph a feature that will prevent outputs
of a fgraph to be created by performing inplace operations on intermediary of a fgraph to be created by performing inplace operations on intermediary
variables. This is useful when the outputs of the fgraph are preallocated variables. This is useful when the outputs of the fgraph are preallocated
to prevent useless copying of the data. Currently, scan preallocates its to prevent useless copying of the data. Currently, scan preallocates its
outputs outputs
""" """
def add_requirements(self, fgraph): def add_requirements(self, fgraph):
super(AddNoOutputFromInplace, self).add_requirements(fgraph) super(AddNoOutputFromInplace, self).add_requirements(fgraph)
...@@ -169,10 +176,12 @@ class AddNoOutputFromInplace(gof.Optimizer): ...@@ -169,10 +176,12 @@ class AddNoOutputFromInplace(gof.Optimizer):
class PrintCurrentFunctionGraph(gof.Optimizer): class PrintCurrentFunctionGraph(gof.Optimizer):
"""This optimizer is for debugging. """
This optimizer is for debugging.
Toss it into the optimization pipeline to see the state of things at any Toss it into the optimization pipeline to see the state of things at any
given point. given point.
""" """
def __init__(self, header): def __init__(self, header):
self.header = header self.header = header
...@@ -233,18 +242,23 @@ optdb.register('merge3', gof.MergeOptimizer(), ...@@ -233,18 +242,23 @@ optdb.register('merge3', gof.MergeOptimizer(),
class Mode(object): class Mode(object):
""" """
The Mode represents a way to optimize and then link a computation The Mode represents a way to optimize and then link a computation graph.
graph.
Parameters
* optimizer -> a structure of type Optimizer. An Optimizer may ----------
simplify the math, put similar computations together, improve optimizer : a structure of type Optimizer
numerical stability and various other improvements. An Optimizer may simplify the math, put similar computations together,
* linker -> a structure of type Linker. A Linker decides which improve numerical stability and various other improvements.
implementations to use (C or Python, for example) and how to linker : a structure of type Linker
string them together to perform the computation. A Linker decides which implementations to use (C or Python, for example)
and how to string them together to perform the computation.
See predefined_linkers, predefined_optimizers and also
predefined_modes. See Also
--------
predefined_linkers
predefined_optimizers
predefined_modes
""" """
def __init__(self, linker=None, optimizer='default'): def __init__(self, linker=None, optimizer='default'):
...@@ -326,6 +340,7 @@ class Mode(object): ...@@ -326,6 +340,7 @@ class Mode(object):
Keyword arguments can be provided for the linker, Keyword arguments can be provided for the linker,
in which case its `clone` method will be called with these in which case its `clone` method will be called with these
arguments. arguments.
""" """
new_linker = self.linker.clone(**link_kwargs) new_linker = self.linker.clone(**link_kwargs)
new_optimizer = self.provided_optimizer new_optimizer = self.provided_optimizer
...@@ -412,7 +427,10 @@ def get_default_mode(): ...@@ -412,7 +427,10 @@ def get_default_mode():
def register_mode(name, mode): def register_mode(name, mode):
"""Add a `Mode` which can be referred to by `name` in `function`.""" """
Add a `Mode` which can be referred to by `name` in `function`.
"""
if name in predefined_modes: if name in predefined_modes:
raise ValueError('Mode name already taken: %s' % name) raise ValueError('Mode name already taken: %s' % name)
predefined_modes[name] = mode predefined_modes[name] = mode
...@@ -8,7 +8,6 @@ from theano.compile.mode import Mode ...@@ -8,7 +8,6 @@ from theano.compile.mode import Mode
class MonitorMode(Mode): class MonitorMode(Mode):
""" """
`MonitorMode` is a debug mode to easily step through function execution. `MonitorMode` is a debug mode to easily step through function execution.
...@@ -19,28 +18,28 @@ class MonitorMode(Mode): ...@@ -19,28 +18,28 @@ class MonitorMode(Mode):
A typical use case is to detect the introduction of NaN values in a graph. A typical use case is to detect the introduction of NaN values in a graph.
For an example of such a use case, see doc/tutorial/debug_faq.txt. For an example of such a use case, see doc/tutorial/debug_faq.txt.
Parameters
----------
pre_func
A function to call before executing a thunk, with arguments:
- the thunk index
- the Apply node
- the thunk to be called
post_func
A function to call after executing a thunk, with the same three
arguments as `pre_func`.
optimizer
The optimizer to use. One may use for instance 'fast_compile' to skip
optimizations.
linker
DO NOT USE. This mode uses its own linker. The parameter is needed to
allow selecting optimizers to use.
""" """
def __init__(self, pre_func=None, post_func=None, def __init__(self, pre_func=None, post_func=None,
optimizer='default', linker=None): optimizer='default', linker=None):
"""
Constructor.
:param pre_func: A function to call before executing a thunk, with
arguments:
- the thunk index
- the Apply node
- the thunk to be called
:param post_func: A function to call after executing a thunk, with the
same three arguments as `pre_func`.
:param optimizer: The optimizer to use. One may use for instance
'fast_compile' to skip optimizations.
:param linker: DO NOT USE. This mode uses its own linker.
The parameter is needed to allow selecting optimizers to use.
"""
self.pre_func = pre_func self.pre_func = pre_func
self.post_func = post_func self.post_func = post_func
wrap_linker = theano.gof.WrapLinkerMany([theano.gof.OpWiseCLinker()], wrap_linker = theano.gof.WrapLinkerMany([theano.gof.OpWiseCLinker()],
...@@ -67,6 +66,7 @@ class MonitorMode(Mode): ...@@ -67,6 +66,7 @@ class MonitorMode(Mode):
def eval(self, i, node, fn): def eval(self, i, node, fn):
""" """
The method that calls the thunk `fn`. The method that calls the thunk `fn`.
""" """
if self.pre_func is not None: if self.pre_func is not None:
self.pre_func(i, node, fn) self.pre_func(i, node, fn)
...@@ -96,9 +96,9 @@ class MonitorMode(Mode): ...@@ -96,9 +96,9 @@ class MonitorMode(Mode):
""" """
Create a new instance of this Mode. Create a new instance of this Mode.
Keyword arguments can be provided for the linker, Keyword arguments can be provided for the linker, but they will be
but they will be ignored, because ProfileMode needs ignored, because ProfileMode needs to use its own linker.
to use its own linker.
""" """
new_mode = type(self)(pre_func=self.pre_func, new_mode = type(self)(pre_func=self.pre_func,
post_func=self.post_func, post_func=self.post_func,
......
...@@ -16,11 +16,14 @@ def flatten(l): ...@@ -16,11 +16,14 @@ def flatten(l):
Parameters Parameters
---------- ----------
l : List/tuple/other objects, might be nested. l : list/tuple/other objects
Might be nested.
Returns Returns
------- -------
A flattened list of objects object
A flattened list of objects.
""" """
if isinstance(l, (list, tuple, collections.ValuesView)): if isinstance(l, (list, tuple, collections.ValuesView)):
rval = [] rval = []
...@@ -53,6 +56,7 @@ def contains_nan(arr): ...@@ -53,6 +56,7 @@ def contains_nan(arr):
This approach is faster and more memory efficient than the obvious This approach is faster and more memory efficient than the obvious
alternative, calling `np.any(np.isnan(ndarray))`, which requires the alternative, calling `np.any(np.isnan(ndarray))`, which requires the
construction of a boolean array with the same shape as the input array. construction of a boolean array with the same shape as the input array.
""" """
if isinstance(arr, theano.gof.type.CDataType._cdata_type): if isinstance(arr, theano.gof.type.CDataType._cdata_type):
return False return False
...@@ -81,6 +85,7 @@ def contains_inf(arr): ...@@ -81,6 +85,7 @@ def contains_inf(arr):
This approach is more memory efficient than the obvious alternative, This approach is more memory efficient than the obvious alternative,
calling `np.any(np.isinf(ndarray))`, which requires the construction of a calling `np.any(np.isinf(ndarray))`, which requires the construction of a
boolean array with the same shape as the input array. boolean array with the same shape as the input array.
""" """
if isinstance(arr, theano.gof.type.CDataType._cdata_type): if isinstance(arr, theano.gof.type.CDataType._cdata_type):
return False return False
...@@ -97,14 +102,16 @@ class NanGuardMode(Mode): ...@@ -97,14 +102,16 @@ class NanGuardMode(Mode):
Parameters Parameters
---------- ----------
nan_is_error : bool nan_is_error : bool
If True, raise an error anytime a NaN is encountered If True, raise an error anytime a NaN is encountered.
inf_is_error: bool inf_is_error : bool
If True, raise an error anytime an Inf is encountered. Note that some If True, raise an error anytime an Inf is encountered. Note that some
pylearn2 modules currently use np.inf as a default value (e.g. pylearn2 modules currently use np.inf as a default value (e.g.
mlp.max_pool) and these will cause an error if inf_is_error is True. mlp.max_pool) and these will cause an error if inf_is_error is True.
big_is_error: bool big_is_error : bool
If True, raise an error when a value greater than 1e10 is encountered. If True, raise an error when a value greater than 1e10 is encountered.
""" """
def __init__(self, nan_is_error, inf_is_error, big_is_error=True): def __init__(self, nan_is_error, inf_is_error, big_is_error=True):
if cuda.cuda_available: if cuda.cuda_available:
self.guard_input = cuda.fvector('nan_guard') self.guard_input = cuda.fvector('nan_guard')
...@@ -135,12 +142,13 @@ class NanGuardMode(Mode): ...@@ -135,12 +142,13 @@ class NanGuardMode(Mode):
var : numpy.ndarray var : numpy.ndarray
The value to be checked. The value to be checked.
nd : theano.gof.Apply nd : theano.gof.Apply
The Apply node being executed The Apply node being executed.
f : callable f : callable
The thunk for the apply node The thunk for the apply node.
is_input : bool is_input : bool
If True, `var` is an input to `nd`. If True, `var` is an input to `nd`.
If False, it is an output. If False, it is an output.
""" """
error = False error = False
if nan_is_error: if nan_is_error:
...@@ -193,15 +201,18 @@ class NanGuardMode(Mode): ...@@ -193,15 +201,18 @@ class NanGuardMode(Mode):
def nan_check(i, node, fn): def nan_check(i, node, fn):
""" """
Runs `fn` while checking its inputs and outputs for NaNs / Infs Runs `fn` while checking its inputs and outputs for NaNs / Infs.
Parameters Parameters
---------- ----------
i : currently ignored (TODO: determine why it is here or remove) i :
Currently ignored.
TODO: determine why it is here or remove).
node : theano.gof.Apply node : theano.gof.Apply
The Apply node currently being executed The Apply node currently being executed.
fn : callable fn : callable
The thunk to execute for this Apply node The thunk to execute for this Apply node.
""" """
inputs = fn.inputs inputs = fn.inputs
# TODO: figure out why individual inputs are themselves lists # TODO: figure out why individual inputs are themselves lists
......
差异被折叠。
差异被折叠。
...@@ -122,7 +122,10 @@ class ProfileMode(Mode): ...@@ -122,7 +122,10 @@ class ProfileMode(Mode):
profile_stats)) profile_stats))
def function_maker(self, i, o, m, *args, **kwargs): def function_maker(self, i, o, m, *args, **kwargs):
"""Return an instance of `Profiler_Maker` which init the count""" """
Return an instance of `Profiler_Maker` which init the count.
"""
assert m is self assert m is self
return Profile_Maker(i, o, self, *args, **kwargs) return Profile_Maker(i, o, self, *args, **kwargs)
...@@ -147,7 +150,9 @@ class ProfileMode(Mode): ...@@ -147,7 +150,9 @@ class ProfileMode(Mode):
self.profile_stats = profile_stats self.profile_stats = profile_stats
def profile_thunk(i, node, th): def profile_thunk(i, node, th):
""" Profile only the execution time """
Profile only the execution time.
""" """
global run_cthunk global run_cthunk
if hasattr(th, 'cthunk'): if hasattr(th, 'cthunk'):
...@@ -169,7 +174,9 @@ class ProfileMode(Mode): ...@@ -169,7 +174,9 @@ class ProfileMode(Mode):
self.apply_time[node] += max(dt, 1e-14) self.apply_time[node] += max(dt, 1e-14)
def profile_thunk2(i, node, th): def profile_thunk2(i, node, th):
""" Profile the execution time and the memory size. """
Profile the execution time and the memory size.
""" """
global run_cthunk global run_cthunk
if hasattr(th, 'cthunk'): if hasattr(th, 'cthunk'):
...@@ -211,7 +218,8 @@ class ProfileMode(Mode): ...@@ -211,7 +218,8 @@ class ProfileMode(Mode):
self.fn_time = 0 self.fn_time = 0
def print_summary(self, **kwargs): def print_summary(self, **kwargs):
""" Print 3 summaries that show where time is spent. The first shows """
Print 3 summaries that show where time is spent. The first shows
an Apply-wise summary, the second an Op-wise summary and the an Apply-wise summary, the second an Op-wise summary and the
third a type-Op-wise summary. third a type-Op-wise summary.
...@@ -235,10 +243,13 @@ class ProfileMode(Mode): ...@@ -235,10 +243,13 @@ class ProfileMode(Mode):
There is an hack with the Op-wise summary. Go see it if you There is an hack with the Op-wise summary. Go see it if you
want to know more. want to know more.
:param kwargs: They are passed to print_summary_ expanded. Parameters
Currently there is n_apply_to_print, ----------
n_ops_to_print and min_memory_size that are kwargs
accepted. They are passed to print_summary_ expanded. Currently there is
n_apply_to_print, n_ops_to_print and min_memory_size that are
accepted.
""" """
compile_time = sum([ps.compile_time for ps compile_time = sum([ps.compile_time for ps
in self.profile_stats.values()]) in self.profile_stats.values()])
...@@ -280,18 +291,23 @@ class ProfileMode(Mode): ...@@ -280,18 +291,23 @@ class ProfileMode(Mode):
**kwargs) **kwargs)
def print_diff_summary(self, other, **kwargs): def print_diff_summary(self, other, **kwargs):
""" As print_summary, but print the difference on two different """
As print_summary, but print the difference on two different
profile mode. profile mode.
TODO: Also we don't print the Apply-wise summary as it don't TODO: Also we don't print the Apply-wise summary as it don't
work for now. work for now.
TODO: make comparaison with gpu code. TODO: make comparaison with gpu code.
:param other: the other instance of ProfileMode that we want Parameters
to be compared to. ----------
:param kwargs: They are passed to print_summary_ expanded. other
The other instance of ProfileMode that we want to be compared to.
kwargs
They are passed to print_summary_ expanded.
Currently there is n_apply_to_print, n_ops_to_print and Currently there is n_apply_to_print, n_ops_to_print and
min_memory_size that are accepted. min_memory_size that are accepted.
""" """
def diff_dict(a_time, b_time_): def diff_dict(a_time, b_time_):
...@@ -343,13 +359,18 @@ class ProfileMode(Mode): ...@@ -343,13 +359,18 @@ class ProfileMode(Mode):
min_memory_size=config.ProfileMode.min_memory_size, min_memory_size=config.ProfileMode.min_memory_size,
): ):
""" """
do the actual printing of print_summary and print_diff_summary. Do the actual printing of print_summary and print_diff_summary.
:param n_apply_to_print: the number of apply to print. Default 15. Parameters
----------
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
min_memory_size
Don't print memory profile of apply whose outputs memory size is
lower than that.
:param n_ops_to_print: the number of ops to print. Default 20.
:param min_memory_size: Don't print memory profile of apply
whose outputs memory size is lower then that.
""" """
print("ProfileMode is deprecated! Use the new profiler.") print("ProfileMode is deprecated! Use the new profiler.")
...@@ -700,9 +721,9 @@ Test them first, as they are not guaranteed to always provide a speedup.""") ...@@ -700,9 +721,9 @@ Test them first, as they are not guaranteed to always provide a speedup.""")
""" """
Create a new instance of this Mode. Create a new instance of this Mode.
Keyword arguments can be provided for the linker, Keyword arguments can be provided for the linker, in which case its
in which case its `clone` method will be called with these `clone` method will be called with these arguments.
arguments.
""" """
new_linker = self.linker.clone(**link_kwargs) new_linker = self.linker.clone(**link_kwargs)
new_optimizer = self.provided_optimizer new_optimizer = self.provided_optimizer
...@@ -727,10 +748,11 @@ prof_mode_instance_to_print = [predefined_modes["PROFILE_MODE"]] ...@@ -727,10 +748,11 @@ prof_mode_instance_to_print = [predefined_modes["PROFILE_MODE"]]
def atexit_print_default_profile_mode(): def atexit_print_default_profile_mode():
"""Print the summary of the predefined mode ProfileMode if used. """
Print the summary of the predefined mode ProfileMode if used.
This all to have the summary printed at exit when config.mode=ProfileMode.
This all to have the summary printed at exit when
config.mode=ProfileMode
""" """
for prof_mode in prof_mode_instance_to_print: for prof_mode in prof_mode_instance_to_print:
if prof_mode.local_time > 0: if prof_mode.local_time > 0:
......
"""ProfileStats object for runtime and memory profiling. """
ProfileStats object for runtime and memory profiling.
""" """
from __future__ import print_function from __future__ import print_function
# #
...@@ -76,7 +78,9 @@ AddConfigVar('profiling.destination', ...@@ -76,7 +78,9 @@ AddConfigVar('profiling.destination',
def _atexit_print_fn(): def _atexit_print_fn():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file """
Print ProfileStat objects in _atexit_print_list to _atexit_print_file.
""" """
to_sum = [] to_sum = []
...@@ -135,6 +139,16 @@ class ProfileStats(object): ...@@ -135,6 +139,16 @@ class ProfileStats(object):
""" """
Object to store runtime and memory profiling information for all of Object to store runtime and memory profiling information for all of
Theano's operations: compilation, optimization, execution. Theano's operations: compilation, optimization, execution.
Parameters
----------
atexit_print : bool
True means that this object will be printed to stderr (using .summary())
at the end of the program.
**kwargs : misc initializers
These should (but need not) match the names of the class vars declared
in this class.
""" """
# #
...@@ -212,12 +226,6 @@ class ProfileStats(object): ...@@ -212,12 +226,6 @@ class ProfileStats(object):
# param is called flag_time_thunks because most other attributes with time # param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags. # in the name are times *of* something, rather than configuration flags.
def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs): def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs):
"""
atexit_print - bool. True means that this object will be printed to
stderr (using .summary()) at the end of the program.
**kwargs - misc initializers. These should (but need not) match the
names of the class vars declared in this class.
"""
if (hasattr(theano, 'sandbox') and if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled): theano.sandbox.cuda.cuda_enabled):
...@@ -250,7 +258,10 @@ class ProfileStats(object): ...@@ -250,7 +258,10 @@ class ProfileStats(object):
_atexit_registered = True _atexit_registered = True
def class_time(self): def class_time(self):
"""dict op -> total time on thunks""" """
dict op -> total time on thunks
"""
# timing is stored by node, we compute timing by class on demand # timing is stored by node, we compute timing by class on demand
rval = {} rval = {}
for node, t in iteritems(self.apply_time): for node, t in iteritems(self.apply_time):
...@@ -260,7 +271,10 @@ class ProfileStats(object): ...@@ -260,7 +271,10 @@ class ProfileStats(object):
return rval return rval
def class_callcount(self): def class_callcount(self):
"""dict op -> total number of thunk calls""" """
dict op -> total number of thunk calls
"""
# timing is stored by node, we compute timing by class on demand # timing is stored by node, we compute timing by class on demand
rval = {} rval = {}
for node, count in iteritems(self.apply_callcount): for node, count in iteritems(self.apply_callcount):
...@@ -270,7 +284,10 @@ class ProfileStats(object): ...@@ -270,7 +284,10 @@ class ProfileStats(object):
return rval return rval
def class_nodes(self): def class_nodes(self):
"""dict op -> total number of nodes""" """
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by class on demand # timing is stored by node, we compute timing by class on demand
rval = {} rval = {}
for node, count in iteritems(self.apply_callcount): for node, count in iteritems(self.apply_callcount):
...@@ -280,7 +297,10 @@ class ProfileStats(object): ...@@ -280,7 +297,10 @@ class ProfileStats(object):
return rval return rval
def class_impl(self): def class_impl(self):
"""dict op -> total number of nodes""" """
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by class on demand # timing is stored by node, we compute timing by class on demand
rval = {} rval = {}
for node in self.apply_callcount: for node in self.apply_callcount:
...@@ -295,7 +315,10 @@ class ProfileStats(object): ...@@ -295,7 +315,10 @@ class ProfileStats(object):
return rval return rval
def op_time(self): def op_time(self):
"""dict op -> total time on thunks""" """
dict op -> total time on thunks
"""
# timing is stored by node, we compute timing by Op on demand # timing is stored by node, we compute timing by Op on demand
rval = {} rval = {}
for node, t in iteritems(self.apply_time): for node, t in iteritems(self.apply_time):
...@@ -304,7 +327,10 @@ class ProfileStats(object): ...@@ -304,7 +327,10 @@ class ProfileStats(object):
return rval return rval
def fill_node_total_time(self, node, total_times): def fill_node_total_time(self, node, total_times):
"""node -> fill total time icluding its parents (returns nothing)""" """
node -> fill total time icluding its parents (returns nothing)
"""
# timing is stored by node, we compute total time on demand # timing is stored by node, we compute total time on demand
total = self.apply_time[node] total = self.apply_time[node]
for parent in node.get_parents(): for parent in node.get_parents():
...@@ -315,7 +341,10 @@ class ProfileStats(object): ...@@ -315,7 +341,10 @@ class ProfileStats(object):
total_times[node] = total total_times[node] = total
def compute_total_times(self): def compute_total_times(self):
"""dict op -> total time icluding the time for parents""" """
dict op -> total time icluding the time for parents
"""
rval = {} rval = {}
for node in self.apply_time: for node in self.apply_time:
if node not in rval: if node not in rval:
...@@ -323,7 +352,10 @@ class ProfileStats(object): ...@@ -323,7 +352,10 @@ class ProfileStats(object):
return rval return rval
def op_callcount(self): def op_callcount(self):
"""dict op -> total number of thunk calls""" """
dict op -> total number of thunk calls
"""
# timing is stored by node, we compute timing by Op on demand # timing is stored by node, we compute timing by Op on demand
rval = {} rval = {}
for node, count in iteritems(self.apply_callcount): for node, count in iteritems(self.apply_callcount):
...@@ -332,7 +364,10 @@ class ProfileStats(object): ...@@ -332,7 +364,10 @@ class ProfileStats(object):
return rval return rval
def op_nodes(self): def op_nodes(self):
"""dict op -> total number of nodes""" """
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by Op on demand # timing is stored by node, we compute timing by Op on demand
rval = {} rval = {}
for node, count in iteritems(self.apply_callcount): for node, count in iteritems(self.apply_callcount):
...@@ -341,7 +376,10 @@ class ProfileStats(object): ...@@ -341,7 +376,10 @@ class ProfileStats(object):
return rval return rval
def op_impl(self): def op_impl(self):
"""dict op -> 'C' or 'Py' depending how the op is implemented""" """
dict op -> 'C' or 'Py' depending how the op is implemented
"""
# timing is stored by node, we compute timing by Op on demand # timing is stored by node, we compute timing by Op on demand
rval = {} rval = {}
for node in self.apply_callcount: for node in self.apply_callcount:
...@@ -711,21 +749,23 @@ class ProfileStats(object): ...@@ -711,21 +749,23 @@ class ProfileStats(object):
def count_running_memory(order, fgraph, nodes_mem): def count_running_memory(order, fgraph, nodes_mem):
""" """
Calculate memory with specific node order Calculate memory with specific node order.
Return a list including the following values Return a list including the following values
1. node_memory_size 1. node_memory_size
Sum of the size of all variables that actually allocate Sum of the size of all variables that actually allocate
memory (excluding views, and inplace); memory (excluding views, and inplace).
2. running_memory_size 2. running_memory_size
The memory allocated after the current apply node The memory allocated after the current apply node.
3. running_max_memory_size 3. running_max_memory_size
The maximum of running_memory_size during the function The maximum of running_memory_size during the function.
4. node_memory_saved_by_view 4. node_memory_saved_by_view
The sum of memory saved by returning view instead of new The sum of memory saved by returning view instead of new
allocation allocation.
5. node_memory_saved_by_inplace 5. node_memory_saved_by_inplace
The sum of memory saved by reusing the input instead of The sum of memory saved by reusing the input instead of
new allocation new allocation.
""" """
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType
# Initial Mem info values [CPU, GPU] # Initial Mem info values [CPU, GPU]
...@@ -874,10 +914,14 @@ class ProfileStats(object): ...@@ -874,10 +914,14 @@ class ProfileStats(object):
def min_memory_generator(executable_nodes, viewed_by, view_of): def min_memory_generator(executable_nodes, viewed_by, view_of):
""" """
Generate all valid node order from node_list Generate all valid node order from node_list and compute its
and compute its memory peak. memory peak.
Parameters
----------
executable_nodes
Set of executable nodes.
:param executable_nodes: Set of executable nodes
""" """
global mem_count, mem_bound, max_mem_count global mem_count, mem_bound, max_mem_count
...@@ -1255,9 +1299,13 @@ if False: # old code still to be ported from ProfileMode ...@@ -1255,9 +1299,13 @@ if False: # old code still to be ported from ProfileMode
""" """
Print a readable summary of the stats. Print a readable summary of the stats.
param: n_apply_to_print the number of apply to print. Default 15. Parameters
----------
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
param: n_ops_to_print the number of ops to print. Default 20.
""" """
local_time = sum(self.apply_time.values()) local_time = sum(self.apply_time.values())
...@@ -1483,11 +1531,13 @@ if False: # old code still to be ported from ProfileMode ...@@ -1483,11 +1531,13 @@ if False: # old code still to be ported from ProfileMode
There is a hack with the Op-wise summary. Go see it if you want to know There is a hack with the Op-wise summary. Go see it if you want to know
more. more.
:param n_apply_to_print: the number of apply to print. Default 15, or Parameters
n_ops_to_print flag. ----------
n_apply_to_print
The number of apply to print. Default 15, or n_ops_to_print flag.
n_ops_to_print
The number of ops to print. Default 20, or n_apply_to_print flag.
:param n_ops_to_print: the number of ops to print. Default 20, or
n_apply_to_print flag.
""" """
fct_call_time = self.mode.fct_call_time fct_call_time = self.mode.fct_call_time
fct_call = self.mode.fct_call fct_call = self.mode.fct_call
...@@ -1517,12 +1567,15 @@ if False: # old code still to be ported from ProfileMode ...@@ -1517,12 +1567,15 @@ if False: # old code still to be ported from ProfileMode
now. now.
TODO: make comparaison with gpu code. TODO: make comparaison with gpu code.
:param other: the other instance of ProfileMode that we want to be Parameters
compared to. ----------
other
:param n_apply_to_print: the number of apply to print. Default 15. The other instance of ProfileMode that we want to be compared to.
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
:param n_ops_to_print: the number of ops to print. Default 20.
""" """
def diff_dict(a_time, b_time_): def diff_dict(a_time, b_time_):
......
"""Provide a simple user friendly API to Theano-managed memory""" """
Provide a simple user friendly API to Theano-managed memory.
"""
# Standard imports # Standard imports
import copy import copy
import logging import logging
...@@ -18,6 +21,32 @@ class SharedVariable(Variable): ...@@ -18,6 +21,32 @@ class SharedVariable(Variable):
Variable that is (defaults to being) shared between functions that Variable that is (defaults to being) shared between functions that
it appears in. it appears in.
Parameters
----------
name : str
The name for this variable (see `Variable`).
type : str
The type for this variable (see `Variable`).
value
A value to associate with this variable (a new container will be
created).
strict
True : assignments to .value will not be cast or copied, so they must
have the correct type.
allow_downcast
Only applies if `strict` is False.
True : allow assigned value to lose precision when cast during
assignment.
False : never allow precision loss.
None : only allow downcasting of a Python float to a scalar floatX.
container
The container to use for this variable. Illegal to pass this as well as
a value.
Notes
-----
For more user-friendly constructor, see `shared`.
""" """
# Container object # Container object
...@@ -36,29 +65,6 @@ class SharedVariable(Variable): ...@@ -36,29 +65,6 @@ class SharedVariable(Variable):
def __init__(self, name, type, value, strict, def __init__(self, name, type, value, strict,
allow_downcast=None, container=None): allow_downcast=None, container=None):
"""
:param name: The name for this variable (see `Variable`).
:param type: The type for this variable (see `Variable`).
:param value: A value to associate with this variable (a new
container will be created).
:param strict: True -> assignments to .value will not be cast
or copied, so they must have the correct type.
:param allow_downcast: Only applies if `strict` is False.
True -> allow assigned value to lose precision when cast
during assignment.
False -> never allow precision loss.
None -> only allow downcasting of a Python float to a scalar floatX.
:param container: The container to use for this
variable. Illegal to pass this as well as a value.
:note: For more user-friendly constructor, see `shared`
"""
super(SharedVariable, self).__init__(type=type, name=name, super(SharedVariable, self).__init__(type=type, name=name,
owner=None, index=None) owner=None, index=None)
...@@ -79,18 +85,21 @@ class SharedVariable(Variable): ...@@ -79,18 +85,21 @@ class SharedVariable(Variable):
allow_downcast=allow_downcast) allow_downcast=allow_downcast)
def get_value(self, borrow=False, return_internal_type=False): def get_value(self, borrow=False, return_internal_type=False):
"""Get the non-symbolic value associated with this SharedVariable. """
Get the non-symbolic value associated with this SharedVariable.
:param borrow: True to permit returning of an object aliased Parameters
to internal memory. ----------
:param return_internal_type: True to permit the returning of borrow : bool
an arbitrary type object used internally to store the True to permit returning of an object aliased to internal memory.
shared variable. return_internal_type : bool
True to permit the returning of an arbitrary type object used
internally to store the shared variable.
Only with borrow=False and return_internal_type=True does this Only with borrow=False and return_internal_type=True does this function
function guarantee that you actually get the internal object. guarantee that you actually get the internal object.
But in that case, you may get different return types when But in that case, you may get different return types when using
using different compute devices. different compute devices.
""" """
if borrow: if borrow:
...@@ -99,14 +108,18 @@ class SharedVariable(Variable): ...@@ -99,14 +108,18 @@ class SharedVariable(Variable):
return copy.deepcopy(self.container.value) return copy.deepcopy(self.container.value)
def set_value(self, new_value, borrow=False): def set_value(self, new_value, borrow=False):
"""Set the non-symbolic value associated with this SharedVariable. """
Set the non-symbolic value associated with this SharedVariable.
:param borrow: Parameters
----------
borrow : bool
True to use the new_value directly, potentially creating problems True to use the new_value directly, potentially creating problems
related to aliased memory. related to aliased memory.
Changes to this value will be visible to all functions using Changes to this value will be visible to all functions using
this SharedVariable. this SharedVariable.
""" """
if borrow: if borrow:
self.container.value = new_value self.container.value = new_value
...@@ -114,15 +127,19 @@ class SharedVariable(Variable): ...@@ -114,15 +127,19 @@ class SharedVariable(Variable):
self.container.value = copy.deepcopy(new_value) self.container.value = copy.deepcopy(new_value)
def zero(self, borrow=False): def zero(self, borrow=False):
"""Set the values of a shared variable to 0. """
Set the values of a shared variable to 0.
:param borrow: Parameters
----------
borrow : bbol
True to modify the value of a shared variable directly by using True to modify the value of a shared variable directly by using
its previous value. Potentially this can cause problems its previous value. Potentially this can cause problems
regarding to the aliased memory. regarding to the aliased memory.
Changes done with this function will be visible to all functions using Changes done with this function will be visible to all functions using
this SharedVariable. this SharedVariable.
""" """
if borrow: if borrow:
self.container.value[...] = 0 self.container.value[...] = 0
...@@ -183,7 +200,8 @@ def shared_constructor(ctor, remove=False): ...@@ -183,7 +200,8 @@ def shared_constructor(ctor, remove=False):
def shared(value, name=None, strict=False, allow_downcast=None, **kwargs): def shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
"""Return a SharedVariable Variable, initialized with a copy or """
Return a SharedVariable Variable, initialized with a copy or
reference of `value`. reference of `value`.
This function iterates over This function iterates over
...@@ -196,23 +214,25 @@ def shared(value, name=None, strict=False, allow_downcast=None, **kwargs): ...@@ -196,23 +214,25 @@ def shared(value, name=None, strict=False, allow_downcast=None, **kwargs):
``theano.shared`` is a shortcut to this function. ``theano.shared`` is a shortcut to this function.
:note: By passing kwargs, you effectively limit the set of Notes
potential constructors to those that can accept those kwargs. -----
By passing kwargs, you effectively limit the set of potential constructors
to those that can accept those kwargs.
:note: Some shared variable have ``borrow`` as extra kwargs. Some shared variable have ``borrow`` as extra kwargs.
`See <http://deeplearning.net/software/theano/tutorial/aliasing.\ `See <http://deeplearning.net/software/theano/tutorial/aliasing.\
html#borrowing-when-creating-shared-variables>`_ for detail. html#borrowing-when-creating-shared-variables>`_ for details.
:note: Some shared variable have ``broadcastable`` as extra kwargs. Some shared variable have ``broadcastable`` as extra kwargs. As shared
As shared variable shapes can change, all dimensions default variable shapes can change, all dimensions default to not being
to not being broadcastable, even if ``value`` has a shape of 1 broadcastable, even if ``value`` has a shape of 1 along some dimension.
along some dimension. This parameter allows you to create This parameter allows you to create for example a `row` or `column` 2d
for example a `row` or `column` 2d tensor. tensor.
.. attribute:: constructors .. attribute:: constructors
A list of shared variable constructors that will be tried in reverse A list of shared variable constructors that will be tried in reverse
order. order.
""" """
...@@ -251,6 +271,9 @@ shared.constructors = [] ...@@ -251,6 +271,9 @@ shared.constructors = []
@shared_constructor @shared_constructor
def generic_constructor(value, name=None, strict=False, allow_downcast=None): def generic_constructor(value, name=None, strict=False, allow_downcast=None):
"""SharedVariable Constructor""" """
SharedVariable Constructor.
"""
return SharedVariable(type=generic, value=value, name=name, strict=strict, return SharedVariable(type=generic, value=value, name=name, strict=strict,
allow_downcast=allow_downcast) allow_downcast=allow_downcast)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论