提交 33a899b2 authored 作者: Iban Harlouchet's avatar Iban Harlouchet

numpydoc for theano/compile/profiling.py

上级 8d57251e
"""ProfileStats object for runtime and memory profiling. """
ProfileStats object for runtime and memory profiling.
""" """
from __future__ import print_function from __future__ import print_function
# #
...@@ -76,7 +78,9 @@ AddConfigVar('profiling.destination', ...@@ -76,7 +78,9 @@ AddConfigVar('profiling.destination',
def _atexit_print_fn(): def _atexit_print_fn():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file """
Print ProfileStat objects in _atexit_print_list to _atexit_print_file.
""" """
to_sum = [] to_sum = []
...@@ -135,6 +139,16 @@ class ProfileStats(object): ...@@ -135,6 +139,16 @@ class ProfileStats(object):
""" """
Object to store runtime and memory profiling information for all of Object to store runtime and memory profiling information for all of
Theano's operations: compilation, optimization, execution. Theano's operations: compilation, optimization, execution.
Parameters
----------
atexit_print : bool
True means that this object will be printed to stderr (using .summary())
at the end of the program.
**kwargs : misc initializers
These should (but need not) match the names of the class vars declared
in this class.
""" """
# #
...@@ -212,12 +226,6 @@ class ProfileStats(object): ...@@ -212,12 +226,6 @@ class ProfileStats(object):
# param is called flag_time_thunks because most other attributes with time # param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags. # in the name are times *of* something, rather than configuration flags.
def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs): def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs):
"""
atexit_print - bool. True means that this object will be printed to
stderr (using .summary()) at the end of the program.
**kwargs - misc initializers. These should (but need not) match the
names of the class vars declared in this class.
"""
if (hasattr(theano, 'sandbox') and if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled): theano.sandbox.cuda.cuda_enabled):
...@@ -250,7 +258,10 @@ class ProfileStats(object): ...@@ -250,7 +258,10 @@ class ProfileStats(object):
_atexit_registered = True _atexit_registered = True
def class_time(self): def class_time(self):
"""dict op -> total time on thunks""" """
dict op -> total time on thunks
"""
# timing is stored by node, we compute timing by class on demand # timing is stored by node, we compute timing by class on demand
rval = {} rval = {}
for node, t in iteritems(self.apply_time): for node, t in iteritems(self.apply_time):
...@@ -260,7 +271,10 @@ class ProfileStats(object): ...@@ -260,7 +271,10 @@ class ProfileStats(object):
return rval return rval
def class_callcount(self): def class_callcount(self):
"""dict op -> total number of thunk calls""" """
dict op -> total number of thunk calls
"""
# timing is stored by node, we compute timing by class on demand # timing is stored by node, we compute timing by class on demand
rval = {} rval = {}
for node, count in iteritems(self.apply_callcount): for node, count in iteritems(self.apply_callcount):
...@@ -270,7 +284,10 @@ class ProfileStats(object): ...@@ -270,7 +284,10 @@ class ProfileStats(object):
return rval return rval
def class_nodes(self): def class_nodes(self):
"""dict op -> total number of nodes""" """
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by class on demand # timing is stored by node, we compute timing by class on demand
rval = {} rval = {}
for node, count in iteritems(self.apply_callcount): for node, count in iteritems(self.apply_callcount):
...@@ -280,7 +297,10 @@ class ProfileStats(object): ...@@ -280,7 +297,10 @@ class ProfileStats(object):
return rval return rval
def class_impl(self): def class_impl(self):
"""dict op -> total number of nodes""" """
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by class on demand # timing is stored by node, we compute timing by class on demand
rval = {} rval = {}
for node in self.apply_callcount: for node in self.apply_callcount:
...@@ -295,7 +315,10 @@ class ProfileStats(object): ...@@ -295,7 +315,10 @@ class ProfileStats(object):
return rval return rval
def op_time(self): def op_time(self):
"""dict op -> total time on thunks""" """
dict op -> total time on thunks
"""
# timing is stored by node, we compute timing by Op on demand # timing is stored by node, we compute timing by Op on demand
rval = {} rval = {}
for node, t in iteritems(self.apply_time): for node, t in iteritems(self.apply_time):
...@@ -304,7 +327,10 @@ class ProfileStats(object): ...@@ -304,7 +327,10 @@ class ProfileStats(object):
return rval return rval
def fill_node_total_time(self, node, total_times): def fill_node_total_time(self, node, total_times):
"""node -> fill total time icluding its parents (returns nothing)""" """
node -> fill total time icluding its parents (returns nothing)
"""
# timing is stored by node, we compute total time on demand # timing is stored by node, we compute total time on demand
total = self.apply_time[node] total = self.apply_time[node]
for parent in node.get_parents(): for parent in node.get_parents():
...@@ -315,7 +341,10 @@ class ProfileStats(object): ...@@ -315,7 +341,10 @@ class ProfileStats(object):
total_times[node] = total total_times[node] = total
def compute_total_times(self): def compute_total_times(self):
"""dict op -> total time icluding the time for parents""" """
dict op -> total time icluding the time for parents
"""
rval = {} rval = {}
for node in self.apply_time: for node in self.apply_time:
if node not in rval: if node not in rval:
...@@ -323,7 +352,10 @@ class ProfileStats(object): ...@@ -323,7 +352,10 @@ class ProfileStats(object):
return rval return rval
def op_callcount(self): def op_callcount(self):
"""dict op -> total number of thunk calls""" """
dict op -> total number of thunk calls
"""
# timing is stored by node, we compute timing by Op on demand # timing is stored by node, we compute timing by Op on demand
rval = {} rval = {}
for node, count in iteritems(self.apply_callcount): for node, count in iteritems(self.apply_callcount):
...@@ -332,7 +364,10 @@ class ProfileStats(object): ...@@ -332,7 +364,10 @@ class ProfileStats(object):
return rval return rval
def op_nodes(self): def op_nodes(self):
"""dict op -> total number of nodes""" """
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by Op on demand # timing is stored by node, we compute timing by Op on demand
rval = {} rval = {}
for node, count in iteritems(self.apply_callcount): for node, count in iteritems(self.apply_callcount):
...@@ -341,7 +376,10 @@ class ProfileStats(object): ...@@ -341,7 +376,10 @@ class ProfileStats(object):
return rval return rval
def op_impl(self): def op_impl(self):
"""dict op -> 'C' or 'Py' depending how the op is implemented""" """
dict op -> 'C' or 'Py' depending how the op is implemented
"""
# timing is stored by node, we compute timing by Op on demand # timing is stored by node, we compute timing by Op on demand
rval = {} rval = {}
for node in self.apply_callcount: for node in self.apply_callcount:
...@@ -711,21 +749,23 @@ class ProfileStats(object): ...@@ -711,21 +749,23 @@ class ProfileStats(object):
def count_running_memory(order, fgraph, nodes_mem): def count_running_memory(order, fgraph, nodes_mem):
""" """
Calculate memory with specific node order Calculate memory with specific node order.
Return a list including the following values Return a list including the following values
1. node_memory_size 1. node_memory_size
Sum of the size of all variables that actually allocate Sum of the size of all variables that actually allocate
memory (excluding views, and inplace); memory (excluding views, and inplace).
2. running_memory_size 2. running_memory_size
The memory allocated after the current apply node The memory allocated after the current apply node.
3. running_max_memory_size 3. running_max_memory_size
The maximum of running_memory_size during the function The maximum of running_memory_size during the function.
4. node_memory_saved_by_view 4. node_memory_saved_by_view
The sum of memory saved by returning view instead of new The sum of memory saved by returning view instead of new
allocation allocation.
5. node_memory_saved_by_inplace 5. node_memory_saved_by_inplace
The sum of memory saved by reusing the input instead of The sum of memory saved by reusing the input instead of
new allocation new allocation.
""" """
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType
# Initial Mem info values [CPU, GPU] # Initial Mem info values [CPU, GPU]
...@@ -874,10 +914,14 @@ class ProfileStats(object): ...@@ -874,10 +914,14 @@ class ProfileStats(object):
def min_memory_generator(executable_nodes, viewed_by, view_of): def min_memory_generator(executable_nodes, viewed_by, view_of):
""" """
Generate all valid node order from node_list Generate all valid node order from node_list and compute its
and compute its memory peak. memory peak.
Parameters
----------
executable_nodes
Set of executable nodes.
:param executable_nodes: Set of executable nodes
""" """
global mem_count, mem_bound, max_mem_count global mem_count, mem_bound, max_mem_count
...@@ -1255,9 +1299,13 @@ if False: # old code still to be ported from ProfileMode ...@@ -1255,9 +1299,13 @@ if False: # old code still to be ported from ProfileMode
""" """
Print a readable summary of the stats. Print a readable summary of the stats.
param: n_apply_to_print the number of apply to print. Default 15. Parameters
----------
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
param: n_ops_to_print the number of ops to print. Default 20.
""" """
local_time = sum(self.apply_time.values()) local_time = sum(self.apply_time.values())
...@@ -1483,11 +1531,13 @@ if False: # old code still to be ported from ProfileMode ...@@ -1483,11 +1531,13 @@ if False: # old code still to be ported from ProfileMode
There is a hack with the Op-wise summary. Go see it if you want to know There is a hack with the Op-wise summary. Go see it if you want to know
more. more.
:param n_apply_to_print: the number of apply to print. Default 15, or Parameters
n_ops_to_print flag. ----------
n_apply_to_print
The number of apply to print. Default 15, or n_ops_to_print flag.
n_ops_to_print
The number of ops to print. Default 20, or n_apply_to_print flag.
:param n_ops_to_print: the number of ops to print. Default 20, or
n_apply_to_print flag.
""" """
fct_call_time = self.mode.fct_call_time fct_call_time = self.mode.fct_call_time
fct_call = self.mode.fct_call fct_call = self.mode.fct_call
...@@ -1517,12 +1567,15 @@ if False: # old code still to be ported from ProfileMode ...@@ -1517,12 +1567,15 @@ if False: # old code still to be ported from ProfileMode
now. now.
TODO: make comparaison with gpu code. TODO: make comparaison with gpu code.
:param other: the other instance of ProfileMode that we want to be Parameters
compared to. ----------
other
:param n_apply_to_print: the number of apply to print. Default 15. The other instance of ProfileMode that we want to be compared to.
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
:param n_ops_to_print: the number of ops to print. Default 20.
""" """
def diff_dict(a_time, b_time_): def diff_dict(a_time, b_time_):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论