提交 2dc0af2f authored 作者: Brandon T. Willard's avatar Brandon T. Willard 提交者: Brandon T. Willard

Change more uses of "optimize" terminology

上级 45f48ae6
"""
Driver of graph construction, optimization, and linking.
"""
"""Objects that orchestrate graph construction, rewriting, and linking."""
import copy
import copyreg
......@@ -753,9 +750,8 @@ class Function:
# cause problems.
on_unused_input="ignore",
function_builder=maker.function_builder,
# As this is an optimized graph, it
# can contain inplace. DebugMode check
# that.
# As this is an rewritten graph, it can contain inplace. DebugMode
# check that.
accept_inplace=True,
no_fgraph_prep=True,
).create(input_storage, storage_map=new_storage_map)
......@@ -1182,7 +1178,7 @@ def insert_deepcopy(fgraph, wrapped_inputs, wrapped_outputs):
This loop was inserted to remove aliasing between outputs when they all
evaluate to the same value. Originally it was OK for outputs to be aliased,
but some of the outputs can be shared variables, and is not good for shared
variables to be aliased. It might be possible to optimize this by making
variables to be aliased. It might be possible to rewrite this by making
sure there is no aliasing only between shared variables.
If some outputs are constant, we add deep copy to respect the memory
......@@ -1279,7 +1275,7 @@ class FunctionMaker:
"""
`FunctionMaker` is the class to `create` `Function` instances.
This class has the fgraph, the optimizer, and the linker. When
This class has the fgraph, the rewriter, and the linker. When
copying a `Function`, there is no need to duplicate the
`FunctionMaker` instance. Deepcopy still copies both, which can
variable in re-compilation.
......@@ -1292,7 +1288,7 @@ class FunctionMaker:
functions produced by FunctionMaker will return their output value
directly.
mode : Mode instance
Telling FunctionMaker how to optimize and link. None means to use the
Telling FunctionMaker how to rewrite and link. None means to use the
`config.mode`.
accept_inplace : bool
True iff it is acceptable to have inplace operations in the graph from
......@@ -1395,44 +1391,44 @@ class FunctionMaker:
@staticmethod
def prepare_fgraph(
inputs, outputs, additional_outputs, fgraph, optimizer, linker, profile
inputs, outputs, additional_outputs, fgraph, rewriter, linker, profile
):
try:
start_optimizer = time.time()
start_rewriter = time.time()
optimizer_profile = None
opt_time = None
rewriter_profile = None
rewrite_time = None
with config.change_flags(
compute_test_value=config.compute_test_value_opt,
traceback__limit=config.traceback__compile_limit,
):
optimizer_profile = optimizer(fgraph)
rewriter_profile = rewriter(fgraph)
end_optimizer = time.time()
opt_time = end_optimizer - start_optimizer
_logger.debug(f"Optimizing took {opt_time:f} seconds")
end_rewriter = time.time()
rewrite_time = end_rewriter - start_rewriter
_logger.debug(f"Rewriting took {rewrite_time:f} seconds")
# Add deep copy to respect the memory interface
insert_deepcopy(fgraph, inputs, outputs + additional_outputs)
finally:
# If the optimizer got interrupted
if opt_time is None:
end_optimizer = time.time()
opt_time = end_optimizer - start_optimizer
# If the rewriter got interrupted
if rewrite_time is None:
end_rewriter = time.time()
rewrite_time = end_rewriter - start_rewriter
aesara.compile.profiling.total_graph_opt_time += opt_time
aesara.compile.profiling.total_graph_rewrite_time += rewrite_time
if profile:
if optimizer_profile is None and hasattr(optimizer, "pre_profile"):
optimizer_profile = optimizer.pre_profile
if rewriter_profile is None and hasattr(rewriter, "pre_profile"):
rewriter_profile = rewriter.pre_profile
profile.optimizer_time += opt_time
profile.rewriting_time += rewrite_time
if config.profile_optimizer:
profile.optimizer_profile = (optimizer, optimizer_profile)
profile.rewriter_profile = (rewriter, rewriter_profile)
elif config.profile_optimizer and profile is not False:
# If False, it means the profiling for that function was
# explicitly disabled
......@@ -1466,8 +1462,8 @@ class FunctionMaker:
):
# Save the provided mode, not the instantiated mode.
# The instantiated mode don't pickle and if we unpickle an Aesara
# function and it get re-compiled, we want the current optimizer to be
# used, not the optimizer when it was saved.
# function and it get re-compiled, we want the current rewriter to be
# used, not the rewriter when it was saved.
self.mode = mode
mode = aesara.compile.mode.get_mode(mode)
......@@ -1478,7 +1474,7 @@ class FunctionMaker:
if profile:
# This is very important:
# 1) We preload the cache here to not have its timing
# included in optimization that compile function.
# included with the rewrites.
# 2) Do not refresh the cache here by default. It cause
# too much execution time during testing as we compile
# much more functions then the number of compile c
......@@ -1515,11 +1511,11 @@ class FunctionMaker:
self.fgraph = fgraph
optimizer, linker = mode.optimizer, copy.copy(mode.linker)
rewriter, linker = mode.optimizer, copy.copy(mode.linker)
if not no_fgraph_prep:
self.prepare_fgraph(
inputs, outputs, found_updates, fgraph, optimizer, linker, profile
inputs, outputs, found_updates, fgraph, rewriter, linker, profile
)
assert len(fgraph.outputs) == len(outputs + found_updates)
......@@ -1715,7 +1711,7 @@ def orig_function(
time spent in this function.
accept_inplace : bool
True iff the graph can contain inplace operations prior to the
optimization phase (default is False).
rewrite phase (default is False).
profile : None or ProfileStats instance
on_unused_input : {'raise', 'warn', 'ignore', None}
What to do if a variable in the 'inputs' list is not used in the graph.
......
......@@ -190,7 +190,7 @@ optdb.register(
# The opt should not do anything that need shape inference.
# New nodes that don't have infer_shape need that the original node
# also don't have infer_shape
local_useless = LocalGroupDB(apply_all_opts=True, profile=True)
local_useless = LocalGroupDB(apply_all_rewrites=True, profile=True)
optdb.register(
"useless",
TopoDB(local_useless, failure_callback=NodeProcessingGraphRewriter.warn_inplace),
......@@ -276,20 +276,19 @@ del _tags
class Mode:
"""
The Mode represents a way to optimize and then link a computation graph.
"""A class that specifies the rewrites/optimizations used during function compilation.
Parameters
----------
optimizer: a structure of type Optimizer
optimizer
An Optimizer may simplify the math, put similar computations together,
improve numerical stability and various other improvements.
linker: a structure of type Linker
linker
A Linker decides which implementations to use (C or Python, for example)
and how to string them together to perform the computation.
db:
The ``RewriteDatabase`` used by this ``Mode``. Note: This value
is *not* part of a ``Mode`` instance's pickled state.
db
The `RewriteDatabase` used by this `Mode`. Note: This value
is *not* part of a `Mode` instance's pickled state.
See Also
--------
......
......@@ -45,7 +45,7 @@ logger = logging.getLogger("aesara.compile.profiling")
aesara_imported_time: float = time.time()
total_fct_exec_time: float = 0.0
total_graph_opt_time: float = 0.0
total_graph_rewrite_time: float = 0.0
total_time_linker: float = 0.0
_atexit_print_list: List["ProfileStats"] = []
......@@ -97,7 +97,7 @@ def _atexit_print_fn():
"fct_call_time",
"fct_callcount",
"vm_call_time",
"optimizer_time",
"rewriter_time",
"linker_time",
"validate_time",
"import_time",
......@@ -120,18 +120,18 @@ def _atexit_print_fn():
assert key not in cum_attr, (key, cum_attr)
cum_attr[key] = val
if cum.optimizer_profile and ps.optimizer_profile:
if cum.rewriter_profile and ps.rewriter_profile:
try:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1], ps.optimizer_profile[1]
merge = cum.rewriter_profile[0].merge_profile(
cum.rewriter_profile[1], ps.rewriter_profile[1]
)
assert len(merge) == len(cum.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
assert len(merge) == len(cum.rewriter_profile[1])
cum.rewriter_profile = (cum.rewriter_profile[0], merge)
except Exception as e:
print(e)
cum.optimizer_profile = None
cum.rewriter_profile = None
else:
cum.optimizer_profile = None
cum.rewriter_profile = None
cum.summary(
file=destination_file,
......@@ -149,7 +149,7 @@ def print_global_stats():
-- Time elapsed since Aesara was imported
-- Time spent inside Aesara functions
-- Time spent in compiling Aesara functions
-- on graph optimization
-- on graph rewriters
-- on linker
"""
......@@ -168,7 +168,7 @@ def print_global_stats():
f"Time elasped since Aesara import = {time.time() - aesara_imported_time:6.3f}s, "
f"Time spent in Aesara functions = {total_fct_exec_time:6.3f}s, "
"Time spent compiling Aesara functions: "
f" optimization = {total_graph_opt_time:6.3f}s, linker = {total_time_linker:6.3f}s ",
f"rewriting = {total_graph_rewrite_time:6.3f}s, linking = {total_time_linker:6.3f}s ",
),
file=destination_file,
)
......@@ -186,7 +186,7 @@ def register_profiler_printer(fct):
class ProfileStats:
"""
Object to store runtime and memory profiling information for all of
Aesara's operations: compilation, optimization, execution.
Aesara's operations: compilation, rewriting, execution.
Parameters
----------
......@@ -220,7 +220,7 @@ class ProfileStats:
compile_time: float = 0.0
# Total time spent in body of orig_function,
# dominated by graph optimization and compilation of C
# dominated by graph rewriting and compilation of C
#
fct_call_time: float = 0.0
......@@ -259,12 +259,12 @@ class ProfileStats:
# Variable -> offset
#
optimizer_time: float = 0.0
# time spent optimizing graph (FunctionMaker.__init__)
rewriting_time: float = 0.0
# time spent rewriting graph (FunctionMaker.__init__)
validate_time: float = 0.0
# time spent in fgraph.validate
# This is a subset of optimizer_time that is dominated by toposort()
# This is a subset of rewriting_time that is dominated by toposort()
# when the destorymap feature is included.
linker_time: float = 0.0
......@@ -284,8 +284,8 @@ class ProfileStats:
# case we print the profile when the function wasn't executed, or if there
# is a lazy operation in the graph.
optimizer_profile = None
# None or tuple (the optimizer, the profile it returned)
rewriter_profile = None
# None or tuple (the rewriter, the profile it returned)
# param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
......@@ -801,9 +801,9 @@ class ProfileStats:
f" Time in thunks: {local_time}s ({100 * local_time / self.fct_call_time:.3f}%)",
file=file,
)
print(f" Total compile time: {self.compile_time:e}s", file=file)
print(f" Total compilation time: {self.compile_time:e}s", file=file)
print(f" Number of Apply nodes: {int(self.nb_nodes)}", file=file)
print(f" Aesara Optimizer time: {self.optimizer_time:e}s", file=file)
print(f" Aesara rewrite time: {self.rewriting_time:e}s", file=file)
print(f" Aesara validate time: {self.validate_time:e}s", file=file)
print(
(
......@@ -823,9 +823,8 @@ class ProfileStats:
print(f" Node {node} time {t:e}s", file=file)
print("", file=file)
# The validation time is a subset of optimizer_time
if self.optimizer_time > 0:
assert self.validate_time < self.optimizer_time
if self.rewriting_time > 0:
assert self.validate_time < self.rewriting_time
def summary_globals(self, file):
print(
......@@ -1468,10 +1467,10 @@ class ProfileStats:
aesara.printing.debugprint(fcts, print_type=True)
if self.variable_shape or self.variable_strides:
self.summary_memory(file, n_apply_to_print)
if self.optimizer_profile:
print("Optimizer Profile", file=file)
print("-----------------", file=file)
self.optimizer_profile[0].print_profile(file, self.optimizer_profile[1])
if self.rewriter_profile:
print("Rewriter Profile", file=file)
print("----------------", file=file)
self.rewriter_profile[0].print_profile(file, self.rewriter_profile[1])
self.print_extra(file)
self.print_tips(file)
......@@ -1619,7 +1618,7 @@ class ProfileStats:
):
print(
(
" - You have a dot operation that was not optimized to"
" - You have a dot operation that was not rewritten to"
" dot22 (which is faster). Make sure the inputs are "
"float32 or float64, and are the same for both inputs. "
f"Currently they are: {[i.type for i in node.inputs]}"
......
......@@ -603,13 +603,13 @@ class ReplaceValidate(History, Validator):
fgraph.revert(chk)
if verbose:
print(
f"optimizer: validate failed on node {r}.\n Reason: {reason}, {e}"
f"rewriting: validate failed on node {r}.\n Reason: {reason}, {e}"
)
raise
if verbose:
print(
f"optimizer: rewrite {reason} replaces {r} of {r.owner} with {new_r} of {new_r.owner}"
f"rewriting: rewrite {reason} replaces {r} of {r.owner} with {new_r} of {new_r.owner}"
)
# The return is needed by replace_all_validate_remove
......
......@@ -481,7 +481,7 @@ class FunctionGraph(MetaObject):
verbose = config.optimizer_verbose
if verbose:
print(
f"optimizer: rewrite {reason} replaces {var} of {var.owner} with {new_var} of {new_var.owner}"
f"rewriting: rewrite {reason} replaces {var} of {var.owner} with {new_var} of {new_var.owner}"
)
new_var = var.type.filter_variable(new_var, allow_convert=True)
......
差异被折叠。
差异被折叠。
......@@ -177,10 +177,10 @@ class InplaceElemwiseOptimizer(GraphRewriter):
fgraph.attach_feature(DestroyHandler())
@staticmethod
def print_profile(stream, prof, level=0):
@classmethod
def print_profile(cls, stream, prof, level=0):
blanc = " " * level
print(blanc, "InplaceElemwiseOptimizer ", prof["opt"].op, file=stream)
print(blanc, cls.__name__, prof["opt"].op, file=stream)
for k in [
"node_before",
"nb_call_replace",
......@@ -3164,10 +3164,10 @@ class FusionOptimizer(GraphRewriter):
time_toposort,
)
@staticmethod
def print_profile(stream, prof, level=0):
@classmethod
def print_profile(cls, stream, prof, level=0):
blanc = " " * level
print(blanc, "FusionOptimizer", file=stream)
print(blanc, cls.__name__, file=stream)
print(blanc, " nb_iter", prof[1], file=stream)
print(blanc, " nb_replacement", prof[2], file=stream)
print(blanc, " nb_inconsistency_replace", prof[3], file=stream)
......
......@@ -1618,10 +1618,10 @@ class GemmOptimizer(GraphRewriter):
callbacks_time,
)
@staticmethod
def print_profile(stream, prof, level=0):
@classmethod
def print_profile(cls, stream, prof, level=0):
blanc = " " * level
print(blanc, "GemmOptimizer", file=stream)
print(blanc, cls.__name__, file=stream)
print(blanc, " nb_iter", prof[1], file=stream)
print(blanc, " nb_replacement", prof[2], file=stream)
print(blanc, " nb_replacement_didn_t_remove", prof[3], file=stream)
......
差异被折叠。
......@@ -2763,13 +2763,9 @@ class AbstractConv3d(AbstractConv):
class AbstractConv_gradWeights(BaseAbstractConv):
"""Gradient wrt. filters for `AbstractConv`.
Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
for a more detailed documentation.
"""Gradient with respect to filters for `AbstractConv`.
:note: You will not want to use this directly, but rely on
Aesara's automatic differentiation or graph optimization to
use it as needed.
Refer to :class:`BaseAbstractConv` for more detailed documentation.
"""
......@@ -2991,13 +2987,9 @@ class AbstractConv_gradWeights(BaseAbstractConv):
class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
"""Gradient wrt. filters for `AbstractConv2d`.
Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
for a more detailed documentation.
"""Gradient with respect to filters for `AbstractConv2d`.
:note: You will not want to use this directly, but rely on
Aesara's automatic differentiation or graph optimization to
use it as needed.
Refer to :class:`BaseAbstractConv` for more detailed documentation.
"""
......@@ -3058,13 +3050,9 @@ class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
class AbstractConv3d_gradWeights(AbstractConv_gradWeights):
"""Gradient wrt. filters for `AbstractConv3d`.
Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
for a more detailed documentation.
"""Gradient with respect to filters for `AbstractConv3d`.
:note: You will not want to use this directly, but rely on
Aesara's automatic differentiation or graph optimization to
use it as needed.
Refer to :class:`BaseAbstractConv` for more detailed documentation.
"""
......@@ -3121,13 +3109,9 @@ class AbstractConv3d_gradWeights(AbstractConv_gradWeights):
class AbstractConv_gradInputs(BaseAbstractConv):
"""Gradient wrt. inputs for `AbstractConv`.
Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
for a more detailed documentation.
"""Gradient with respect to inputs for `AbstractConv`.
:note: You will not want to use this directly, but rely on
Aesara's automatic differentiation or graph optimization to
use it as needed.
Refer to :class:`BaseAbstractConv` for more detailed documentation.
"""
......@@ -3373,13 +3357,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
"""Gradient wrt. inputs for `AbstractConv2d`.
Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
for a more detailed documentation.
"""Gradient with respect to inputs for `AbstractConv2d`.
:note: You will not want to use this directly, but rely on
Aesara's automatic differentiation or graph optimization to
use it as needed.
Refer to :class:`BaseAbstractConv` for more detailed documentation.
"""
......@@ -3440,13 +3420,9 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
class AbstractConv3d_gradInputs(AbstractConv_gradInputs):
"""Gradient wrt. inputs for `AbstractConv3d`.
Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
for a more detailed documentation.
"""Gradient with respect to inputs for `AbstractConv3d`.
:note: You will not want to use this directly, but rely on
Aesara's automatic differentiation or graph optimization to
use it as needed.
Refer to :class:`BaseAbstractConv` for more detailed documentation.
"""
......
......@@ -1388,21 +1388,20 @@ def local_setsubtensor_of_constants(fgraph, node):
@register_specialize
@node_rewriter([AdvancedSubtensor1])
def local_adv_sub1_adv_inc_sub1(fgraph, node):
"""Optimize the possible AdvSub1(AdvSetSub1(...), ...).
"""Rewrite graphs like ``AdvancedSubtensor1(AdvancedSetSubtensor1(...), ...)``.
AdvancedSubtensor1(AdvancedSetSubtensor1(x, y, idx), idx) -> y
Notes
-----
This opt add AssertOp. Otherwise, it would remove shape and
index error. If you want to get rid of them, see the
:ref:`unsafe_optimization` section.
This rewrite adds an `AssertOp`; otherwise, it would remove shape and index
error. If you want to get rid of them, see the :ref:`unsafe_rewrites`
section.
WARNING:
A previous version of this optimization also matched
AdvancedSubtensor1(AdvancedIncSubtensor1(0s, y, idx), idx) -> y
A previous version of this rewrite also matched
``AdvancedSubtensor1(AdvancedIncSubtensor1(x, y, idx), idx)``.
This is incorrect when there are duplicate indices.
The current version warns the user about potential past issues.
The current version warns the user about potential issues.
"""
if not isinstance(node.op, AdvancedSubtensor1):
......
......@@ -5,26 +5,26 @@
Extending Aesara: FAQ and Troubleshooting
=========================================
I wrote a new Op/Type, and weird stuff is happening...
------------------------------------------------------
I wrote a new `Op`\/`Type`, and weird stuff is happening...
-----------------------------------------------------------
First, check the :ref:`op_contract` and the :ref:`type_contract`
and make sure you're following the rules.
Then try running your program in :ref:`using_debugmode`. DebugMode might catch
Then try running your program in :ref:`using_debugmode`. `DebugMode` might catch
something that you're not seeing.
I wrote a new optimization, but it's not getting used...
---------------------------------------------------------
I wrote a new rewrite, but it's not getting used...
---------------------------------------------------
Remember that you have to register optimizations with the :ref:`optdb`
Remember that you have to register rewrites with the :ref:`optdb`
for them to get used by the normal modes like FAST_COMPILE, FAST_RUN,
and DebugMode.
and `DebugMode`.
I wrote a new optimization, and it changed my results even though I'm pretty sure it is correct.
------------------------------------------------------------------------------------------------
I wrote a new rewrite, and it changed my results even though I'm pretty sure it is correct.
-------------------------------------------------------------------------------------------
First, check the :ref:`op_contract` and make sure you're following the rules.
Then try running your program in :ref:`using_debugmode`. DebugMode might
Then try running your program in :ref:`using_debugmode`. `DebugMode` might
catch something that you're not seeing.
......@@ -205,7 +205,7 @@ structures, code going like ``def f(x): ...`` would produce an :class:`Op` for
A :class:`Type` in Aesara provides static information (or constraints) about
data objects in a graph. The information provided by :class:`Type`\s allows
Aesara to perform optimizations and produce more efficient compiled code.
Aesara to perform rewrites and produce more efficient compiled code.
Every symbolic :class:`Variable` in an Aesara graph has an associated
:class:`Type` instance, and :class:`Type`\s also serve as a means of
......@@ -306,7 +306,7 @@ When used in a computation graph as the input of an
will *always* take the value contained in the :class:`Constant`'s data
field. Furthermore, it is assumed that the :class:`Op` will not under
any circumstances modify the input. This means that a :class:`Constant` is
eligible to participate in numerous optimizations: constant in-lining
eligible to participate in numerous rewrites: constant in-lining
in C code, constant folding, etc.
Automatic Differentiation
......@@ -327,26 +327,26 @@ gradient of the graph's output with respect to the graph's inputs.
A following section of this tutorial will examine the topic of
:ref:`differentiation<tutcomputinggrads>` in greater detail.
Optimizations
=============
Rewrites
========
When compiling an Aesara graph using :func:`aesara.function`, a graph is
necessarily provided. While this graph structure shows how to compute the
output from the input, it also offers the possibility to improve the way this
computation is carried out. The way optimizations work in Aesara is by
computation is carried out. The way rewrites work in Aesara is by
identifying and replacing certain patterns in the graph with other specialized
patterns that produce the same results but are either faster or more
stable. Optimizations can also detect identical subgraphs and ensure that the
stable. Rewrites can also detect identical subgraphs and ensure that the
same values are not computed twice.
For example, one (simple) optimization that Aesara uses is to replace
For example, one simple rewrite that Aesara uses is to replace
the pattern :math:`\frac{xy}{y}` by :math:`x`.
See :ref:`graph_rewriting` and :ref:`optimizations` for more information.
**Example**
Consider the following example of optimization:
Consider the following example of rewrites:
>>> import aesara
>>> a = aesara.tensor.vector("a") # declare symbolic variable
......@@ -354,13 +354,13 @@ Consider the following example of optimization:
>>> f = aesara.function([a], b) # compile function
>>> print(f([0, 1, 2])) # prints `array([0,2,1026])`
[ 0. 2. 1026.]
>>> aesara.printing.pydotprint(b, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at ./pics/symbolic_graph_unopt.png
>>> aesara.printing.pydotprint(f, outfile="./pics/symbolic_graph_opt.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at ./pics/symbolic_graph_opt.png
>>> aesara.printing.pydotprint(b, outfile="./pics/symbolic_graph_no_rewrite.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at ./pics/symbolic_graph_no_rewrite.png
>>> aesara.printing.pydotprint(f, outfile="./pics/symbolic_graph_rewite.png", var_with_name_simple=True) # doctest: +SKIP
The output file is available at ./pics/symbolic_graph_rewrite.png
We used :func:`aesara.printing.pydotprint` to visualize the optimized graph
(right), which is much more compact than the unoptimized graph (left).
We used :func:`aesara.printing.pydotprint` to visualize the rewritten graph
(right), which is much more compact than the un-rewritten graph (left).
.. |g1| image:: ./pics/symbolic_graph_unopt.png
:width: 500 px
......@@ -368,7 +368,7 @@ We used :func:`aesara.printing.pydotprint` to visualize the optimized graph
:width: 500 px
================================ ====================== ================================
Unoptimized graph Optimized graph
Un-rewritten graph Rewritten graph
================================ ====================== ================================
|g1| |g2|
================================ ====================== ================================
......@@ -6,14 +6,14 @@ Extending Aesara
================
This advanced tutorial is for users who want to extend Aesara with new :class:`Type`\s,
new Operations (:Class:`Op`\S), and new graph optimizations. This first page of the
tutorial mainly focuses on the Python implementation of an :Class:`Op` and then
new operations (i.e. :class:`Op`\s), and new graph rewrites. This first page of the
tutorial mainly focuses on the Python implementation of an :class:`Op` and then
proposes an overview of the most important methods that define an :class:`Op`.
The second page of the tutorial (:ref:`creating_a_c_op`) provides then
information on the C implementation of an :Class:`Op`. The rest of the tutorial
goes more in depth on advanced topics related to :Class:`Op`\s, such as how to write
efficient code for an :Class:`Op` and how to write an optimization to speed up the
execution of an :Class:`Op`.
information on the C implementation of an :class:`Op`. The rest of the tutorial
goes more in depth on advanced topics related to :class:`Op`\s, such as how to write
efficient code for an :class:`Op` and how to write an rewrite to speed up the
execution of an :class:`Op`.
Along the way, this tutorial also introduces many aspects of how Aesara works,
so it is also good for you if you are interested in getting more under the hood
......@@ -23,11 +23,11 @@ with Aesara itself.
Before tackling this more advanced presentation, it is highly recommended
to read the introductory :ref:`Tutorial<tutorial>`, especially the sections
that introduce the Aesara Graphs, as providing a novel Aesara :class:`Op` requires a
basic understanting of the Aesara Graphs.
that introduce the Aesara graphs, as providing a novel Aesara :class:`Op` requires a
basic understanting of the Aesara graphs.
See also the :ref:`dev_start_guide` for information regarding the
versioning framework, namely about *git* and *GitHub*, regarding the
versioning framework, namely about Git and GitHub, regarding the
development workflow and how to make a quality contribution.
.. toctree::
......
......@@ -5,11 +5,11 @@
Views and inplace operations
============================
Aesara allows the definition of ``Op``\s which return a :term:`view` on one
Aesara allows the definition of :class:`Op`\s which return a :term:`view` on one
of their inputs or operate :term:`inplace` on one or several
inputs. This allows more efficient operations on NumPy's ``ndarray``
inputs. This allows more efficient operations on NumPy's :class:`ndarray`
data type than would be possible otherwise.
However, in order to work correctly, these ``Op``\s need to
However, in order to work correctly, these :class:`Op`\s need to
implement an additional interface.
Aesara recognizes views and inplace operations specially. It ensures
......@@ -23,7 +23,7 @@ Views
A "view" on an object ``x`` is an object ``y`` which shares memory
with ``x`` in some way. In other words, changing ``x`` might also
change ``y`` and vice versa. For example, imagine a ``vector`` structure
change ``y`` and vice versa. For example, imagine a `vector` structure
which contains two fields: an integer length and a pointer to a memory
buffer. Suppose we have:
......@@ -44,9 +44,9 @@ range ``0xDEADBEFF - 0xDEADBFDF`` and z the range ``0xCAFEBABE -
0xCAFEBBBE``. Since the ranges for ``x`` and ``y`` overlap, ``y`` is
considered to be a view of ``x`` and vice versa.
Suppose you had an ``Op`` which took ``x`` as input and returned
Suppose you had an :class:`Op` which took ``x`` as input and returned
``y``. You would need to tell Aesara that ``y`` is a view of ``x``. For this
purpose, you would set the ``view_map`` field as follows:
purpose, you would set the :class:`Op.view_map` field as follows:
.. testsetup::
......@@ -88,15 +88,15 @@ Inplace operations
An inplace operation is one that modifies one or more of its
inputs. For example, the expression ``x += y`` where ``x`` and ``y``
are ``numpy.ndarray`` instances would normally represent an inplace
are :class:`numpy.ndarray` instances would normally represent an inplace
operation on ``x``.
.. note::
Inplace operations in Aesara still work in a functional setting:
they need to return the modified input. Symbolically, Aesara
requires one Variable standing for the input *before* being modified
and *another* Variable representing the input *after* being
requires one :class:`Variable` standing for the input before being modified
and another :class:`Variable` representing the input after being
modified. Therefore, code using inplace operations would look like
this:
......@@ -121,29 +121,29 @@ operation on ``x``.
Needless to say, this goes for user-defined inplace operations as
well; the modified input must figure in the list of outputs you
give to ``Apply`` in the definition of ``make_node``.
give to :class:`Apply` in the definition of :meth:`Apply.make_node`.
Also, for technical reasons but also because they are slightly
confusing to use as evidenced by the previous code, Aesara does not
allow the end user to use inplace operations by default. However,
it does allow *optimizations* to substitute them in in a later
it does allow rewrites to substitute them in in a later
phase. Therefore, typically, if you define an inplace operation,
you will define a pure equivalent and an optimization which
you will define a pure equivalent and a rewrite which
substitutes one for the other. Aesara will automatically verify if
it is possible to do so and will refuse the substitution if it
introduces inconsistencies.
Take the previous definitions of ``x``, ``y`` and ``z`` and suppose an ``Op`` which
Take the previous definitions of ``x``, ``y`` and ``z`` and suppose an :class:`Op` which
adds one to every byte of its input. If we give ``x`` as an input to
that ``Op``, it can either allocate a new buffer of the same size as ``x``
that :class:`Op`, it can either allocate a new buffer of the same size as ``x``
(that could be ``z``) and set that new buffer's bytes to the variable of
the addition. That would be a normal, :term:`pure` ``Op``. Alternatively,
it could add one to each byte *in* the buffer ``x``, therefore
changing it. That would be an inplace ``Op``.
the addition. That would be a normal, :term:`pure`\ :class:`Op`. Alternatively,
it could add one to each byte in the buffer ``x``, therefore
changing it. That would be an inplace :class:`Op`.
Aesara needs to be notified of this fact. The syntax is similar to
that of ``view_map``:
that of :attr:`Op.view_map`:
.. testcode::
......@@ -171,27 +171,27 @@ first input (position 0).
# unlike for views, the previous line is legal and supported
.. note::
``DestroyHandler`` provides a hackish means of specifying that a variable cannot be
:class:`DestroyHandler` provides a hackish means of specifying that a variable cannot be
"destroyed" by an in-place operation: ``var.tag.indestructible = True``.
Destructive Operations
======================
While some operations will operate inplace on their inputs, some might
simply destroy or corrupt them. For example, an ``Op`` could do temporary
simply destroy or corrupt them. For example, an :class:`Op` could do temporary
calculations right in its inputs. If that is the case, Aesara also
needs to be notified. The way to notify Aesara is to assume that some
output operated inplace on whatever inputs are changed or corrupted by
the ``Op`` (even if the output does not technically reuse any of the
the :class:`Op` (even if the output does not technically reuse any of the
input(s)'s memory). From there, go to the previous section.
.. warning::
Failure to correctly mark down views and inplace operations using
``view_map`` and ``destroy_map`` can lead to nasty bugs. In the
:attr:`Op.view_map` and :attr:`Op.destroy_map` can lead to nasty bugs. In the
absence of this information, Aesara might assume that it is safe to
execute an inplace operation on some inputs *before* doing other
calculations on the *previous* values of the inputs. For example,
execute an inplace operation on some inputs before doing other
calculations on the previous values of the inputs. For example,
in the code: ``y = log(x); x2 = add_inplace(x, z)`` it is
imperative to do the logarithm before the addition (because after
the addition, the original x that we wanted to take the logarithm
......@@ -199,25 +199,28 @@ input(s)'s memory). From there, go to the previous section.
the value of ``x`` it might invert the order and that will
certainly lead to erroneous computations.
You can often identify an incorrect ``view_map`` or ``destroy_map``
by using :ref:`DebugMode`. *Be sure to use ``DebugMode`` when developing
a new ``Op`` that uses ``view_map`` and/or ``destroy_map``.*
You can often identify an incorrect `Op.view_map` or :attr:`Op.destroy_map`
by using :ref:`DebugMode`.
Inplace optimization and DebugMode
==================================
.. note::
Consider using :class:`DebugMode` when developing
a new :class:`Op` that uses :attr:`Op.view_map` and/or :attr:`Op.destroy_map`.
Inplace Rewriting and `DebugMode`
=================================
It is recommended that during the graph construction, all ``Op``\s are not inplace.
Then an optimization replaces them with inplace ones. Currently ``DebugMode`` checks
all optimizations that were tried even if they got rejected. One reason an inplace
optimization can get rejected is when there is another ``Op`` that is already being applied
inplace on the same input. Another reason to reject an inplace optimization is
It is recommended that during the graph construction, all :class:`Op`\s are not inplace.
Then a rewrite replaces them with inplace ones. Currently :class:`DebugMode` checks
all rewrites that were tried even if they got rejected. One reason an inplace
rewrite can get rejected is when there is another :class:`Op` that is already being applied
inplace on the same input. Another reason to reject an inplace rewrite is
if it would introduce a cycle into the graph.
The problem with ``DebugMode`` is that it will trigger a useless error when
checking a rejected inplace optimization, since it will lead to wrong results.
In order to be able to use ``DebugMode`` in more situations, your inplace
optimization can pre-check whether it will get rejected by using the
``aesara.graph.destroyhandler.fast_inplace_check()`` function, that will tell
which ``Op``\s can be performed inplace. You may then skip the optimization if it is
incompatible with this check. Note however that this check does not cover all
cases where an optimization may be rejected (it will not detect cycles).
The problem with `DebugMode` is that it will trigger a useless error when
checking a rejected inplace rewrite, since it will lead to wrong results.
In order to be able to use `DebugMode` in more situations, your inplace
rewrite can pre-check whether it will get rejected by using the
:func:`aesara.graph.destroyhandler.fast_inplace_check` function, that will tell
which :class:`Op`\s can be performed inplace. You may then skip the rewrite if it is
incompatible with this check. Note, however, that this check does not cover all
cases where a rewrite may be rejected (it will not detect cycles).
......@@ -77,12 +77,12 @@ It has to define the following methods.
``other`` is also an :class:`Op`.
Returning ``True`` here is a promise to the optimization system
Returning ``True`` here is a promise to the rewrite system
that the other :class:`Op` will produce exactly the same graph effects
(from perform) as this one, given identical inputs. This means it
(e.g. from its :meth:`Op.perform`) as this one, given identical inputs. This means it
will produce the same output values, it will destroy the same
inputs (same ``destroy_map``), and will alias outputs to the same
inputs (same ``view_map``). For more details, see
inputs (same :attr:`Op.destroy_map`), and will alias outputs to the same
inputs (same :attr:`Op.view_map`). For more details, see
:ref:`views_and_inplace`.
.. note::
......@@ -99,9 +99,9 @@ It has to define the following methods.
lifetime of self. :class:`Op` instances should be immutable in this
sense.
.. note::
.. note::
If you set `__props__`, this will be automatically generated.
If you set :attr:`Op.__props__`, this will be automatically generated.
.. op_optional:
......@@ -110,7 +110,7 @@ Optional methods or attributes
.. attribute:: __props__
*Default:* Undefined
Default: Undefined
Must be a tuple. Lists the name of the attributes which influence
the computation performed. This will also enable the automatic
......@@ -122,7 +122,7 @@ Optional methods or attributes
.. attribute:: default_output
*Default:* None
Default: None
If this member variable is an integer, then the default
implementation of ``__call__`` will return
......@@ -177,7 +177,7 @@ Optional methods or attributes
.. function:: infer_shape(fgraph, node, shapes)
This function is needed for shape optimization. ``shapes`` is a
This function is needed for shape rewrites. ``shapes`` is a
list with one tuple for each input of the :class:`Apply` node (which corresponds
to the inputs of the :class:`Op`). Each tuple contains as many elements as the
number of dimensions of the corresponding input. The value of each element
......@@ -216,9 +216,9 @@ Optional methods or attributes
.. function:: do_constant_folding(fgraph, node)
*Default:* Return True
Default: Return ``True``
By default when optimizations are enabled, we remove during
By default when rewrites are enabled, we remove during
function compilation :class:`Apply` nodes whose inputs are all constants.
We replace the :class:`Apply` node with an Aesara constant variable.
This way, the :class:`Apply` node is not executed at each function
......
......@@ -35,21 +35,20 @@ Some relevant :ref:`Features <libdoc_graph_fgraphfeature>` are typically added t
rewrites from operating in-place on inputs declared as immutable.
Step 2 - Perform graph optimizations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Step 2 - Perform graph rewrites
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Once the :class:`FunctionGraph` is constructed, an :term:`optimizer` is produced by
the :term:`mode` passed to :func:`function` (the :class:`Mode` basically has two
important fields, :attr:`linker` and :attr:`optimizer`). That optimizer is
applied on the :class:`FunctionGraph` using its :meth:`Optimizer.optimize` method.
Once the :class:`FunctionGraph` is constructed, a :term:`rewriter` is produced by
the :term:`mode` passed to :func:`function`. That rewrite is
applied to the :class:`FunctionGraph` using its :meth:`GraphRewriter.rewrite` method.
The optimizer is typically obtained through :attr:`optdb`.
The rewriter is typically obtained through a query on :attr:`optdb`.
Step 3 - Execute linker to obtain a thunk
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Once the computation graph is optimized, the :term:`linker` is
Once the computation graph is rewritten, the :term:`linker` is
extracted from the :class:`Mode`. It is then called with the :class:`FunctionGraph` as
argument to produce a ``thunk``, which is a function with no arguments that
returns nothing. Along with the thunk, one list of input containers (a
......@@ -61,9 +60,9 @@ the inputs must be placed in the input containers, the thunk must be
called, and the outputs must be retrieved from the output containers
where the thunk put them.
Typically, the linker calls the ``toposort`` method in order to obtain
Typically, the linker calls the :meth:`FunctionGraph.toposort` method in order to obtain
a linear sequence of operations to perform. How they are linked
together depends on the Linker used. The :class:`CLinker` produces a single
together depends on the :class:`Linker` class used. For example, the :class:`CLinker` produces a single
block of C code for the whole computation, whereas the :class:`OpWiseCLinker`
produces one thunk for each individual operation and calls them in
sequence.
......
......@@ -36,7 +36,7 @@ The following sections assumes the reader is familiar with the following :
2. The interface and usage of Aesara's :ref:`scan <lib_scan>` function
Additionally, the :ref:`scan_internals_optimizations` section below assumes
Additionally, the :ref:`scan_internals_rewrites` section below assumes
knowledge of:
3. Aesara's :ref:`graph rewriting <graph_rewriting>`
......@@ -63,7 +63,7 @@ deal with, are :
* ``views.py`` contains different views of the `Scan` `Op` that have
simpler and easier signatures to be used in specific cases.
* ``opt.py`` contains the list of all Aesara graph optimizations for the
* ``opt.py`` contains the list of all Aesara graph rewrites for the
`Scan` operator.
......@@ -155,15 +155,15 @@ Multiply-recurrent multiple outputs (MITMOT) Initial values for
=========================================================== ======================================================= ============================================================ ============================================================= ========================================================= ======================================================
.. _scan_internals_optimizations:
.. _scan_internals_rewrites:
Optimizations
=============
Rewrites
========
`remove_constants_and_unused_inputs_scan`
-----------------------------------------
This optimization serves two purposes, The first is to remove a `Scan` `Op`'s
This rewrite serves two purposes, The first is to remove a :class:`Scan`\ `Op`'s
unused inputs. The second is to take a `Scan` `Op`'s constant inputs and remove
them, instead injecting the constants directly into the graph or the `Scan`
`Op`'s inner function. This will allow constant folding to happen inside the
......@@ -173,31 +173,31 @@ inner function.
`PushOutNonSeqScan`
-------------------
This optimizations pushes, out of `Scan`'s inner function and into the outer
function, computation that depends only on non-sequence inputs. Such
computation ends up being done every iteration on the same values so moving
it to the outer function to be executed only once, before the `Scan` `Op`,
reduces the amount of computation that needs to be performed.
This rewrite pushes sub-graphs that depends only on non-sequence inputs out of
`Scan`'s inner function and into the outer function. Such computation ends up
being done every iteration on the same values so moving it to the outer function
to be executed only once, before the `Scan`\ `Op`, reduces the amount of
computation that needs to be performed.
`PushOutSeqScan`
----------------
This optimization resembles `PushOutNonSeqScan` but it tries to push, out of
This rewrite resembles `PushOutNonSeqScan` but it tries to push, out of
the inner function, the computation that only relies on sequence and
non-sequence inputs. The idea behind this optimization is that, when it is
non-sequence inputs. The idea behind this rewrite is that, when it is
possible to do so, it is generally more computationally efficient to perform
a single operation on a large tensor rather then perform that same operation
many times on many smaller tensors. In many cases, this optimization can
many times on many smaller tensors. In many cases, this rewrite can
increase memory usage but, in some specific cases, it can also decrease it.
`PushOutScanOutput`
-------------------
This optimizations attempts to push out some of the computation at the end
This rewrite attempts to push out some of the computation at the end
of the inner function to the outer function, to be executed after the `Scan`
node. Like `PushOutSeqScan`, this optimization aims to replace many operations
node. Like `PushOutSeqScan`, this rewrite aims to replace many operations
on small tensors by few operations on large tensors. It can also lead to
increased memory usage.
......@@ -205,23 +205,23 @@ increased memory usage.
`PushOutDot1`
-------------
This is another optimization that attempts to detect certain patterns of
computation in a `Scan` `Op`'s inner function and move this computation to the
This is another rewrite that attempts to detect certain patterns of
computation in a `Scan`\ `Op`'s inner function and move this computation to the
outer graph.
`ScanInplaceOptimizer`
----------------------
This optimization attempts to make `Scan` compute its recurrent outputs inplace
on the input tensors that contain their initial states. This optimization can
This rewrite attempts to make `Scan` compute its recurrent outputs inplace
on the input tensors that contain their initial states. This rewrite can
improve runtime performance as well as reduce memory usage.
`ScanSaveMem`
-------------
This optimizations attempts to determine if a `Scan` node, during its execution,
This rewrite attempts to determine if a `Scan` node, during its execution,
for any of its outputs, can get away with allocating a memory buffer that is
large enough to contain some of the computed timesteps of that output but not
all of them.
......@@ -233,7 +233,7 @@ need to store the most recent ``N`` values, not all of them.
For instance, if a `Scan` node has a SITSOT output (last computed value is
fed back as an input at the next iteration) and only the last timestep of
that output is ever used in the outer function, the `ScanSaveMem` optimization
that output is ever used in the outer function, the `ScanSaveMem` rewrite
could determine that there is no need to store all computed timesteps for
that SITSOT output. Only the most recently computed timestep ever needs to
be kept in memory.
......@@ -242,11 +242,11 @@ be kept in memory.
`ScanMerge`
-----------
This optimization attempts to fuse distinct `Scan` `Op`s into a single `Scan` `Op`
that performs all the computation. The main advantage of merging `Scan` `Op`\s
together comes from the possibility of both original `Op`\s having some
This rewrite attempts to fuse distinct `Scan` nodes into a single `Scan` node
that performs all the computation. The main advantage of merging `Scan` nodes
together comes from the possibility of both original `Scan`\ `Op`\s having some
computation in common. In such a setting, this computation ends up being done
twice. The fused `Scan` `Op`, however, would only need to do it once and could
twice. The fused `Scan`\s, however, would only need to do it once and could
therefore be more computationally efficient. Also, since every `Scan` node
involves a certain overhead, at runtime, reducing the number of `Scan` nodes in
the graph can improve performance.
......@@ -255,7 +255,7 @@ the graph can improve performance.
`scan_merge_inouts`
-------------------
This optimization attempts to merge a `Scan` `Op`'s identical outer inputs as well
This rewrite attempts to merge a `Scan`\s identical outer inputs as well
as merge its identical outer outputs (outputs that perform the same
computation on the same inputs). This can reduce the amount of computation as
well as result in a simpler graph for both the inner function and the outer
......@@ -267,7 +267,7 @@ Helper classes and functions
Because of the complexity involved in dealing with `Scan`, a large number of
helper classes and functions have been developed over time to implement
operations commonly needed when dealing with the `Scan` `Op`. The `Scan` `Op`
operations commonly needed when dealing with the `Scan`\ `Op`. The `Scan`\ `Op`
itself defines a large number of them and others can be found in the file
``utils.py``. This sections aims to point out the most useful ones sorted
by usage.
......
......@@ -25,7 +25,7 @@ simple function:
def sum_square_difference(a, b):
return at.sum((a - b)**2)
Even without taking Aesara's optimizations into account, it is likely
Even without taking Aesara's rewrites into account, it is likely
to work just as well as a custom implementation. It also supports all
data types, tensors of all dimensions as well as broadcasting, whereas
a custom implementation would probably only bother to support
......
......@@ -5,7 +5,7 @@
===============
The :class:`Type` class is used to provide "static" information about the types of
:class:`Variable`\s in an Aesara graph. This information is used for graph optimizations
:class:`Variable`\s in an Aesara graph. This information is used for graph rewrites
and compilation to languages with typing that's stricter than Python's.
The types handled by Aesara naturally overlap a lot with NumPy, but
......@@ -311,7 +311,7 @@ default values.
Optional. Only needed to profile the memory of this :class:`Type` of object.
:param shape_info: the output of the call to get_shape_info()
:param shape_info: the output of the call to `get_shape_info`
:return: the number of bytes taken by the object described by
``shape_info``.
......@@ -324,8 +324,8 @@ For certain mechanisms, you can register functions and other such
things to plus your type into aesara's mechanisms. These are optional
but will allow people to use you type with familiar interfaces.
`transfer()`
~~~~~~~~~~~~
`transfer`
~~~~~~~~~~
To plug in additional options for the transfer target, define a
function which takes an Aesara variable and a target argument and
......@@ -388,7 +388,7 @@ when ``allow_downcast`` is False, i.e. no precision loss is allowed.
The second method we define is ``values_eq_approx``. This method
allows approximate comparison between two values respecting our :class:`Type`'s
constraints. It might happen that an optimization changes the computation
constraints. It might happen that a rewrite changes the computation
graph in such a way that it produces slightly different variables, for
example because of numerical instability like rounding errors at the
end of the mantissa. For instance, ``a + a + a + a + a + a`` might not
......
......@@ -13,7 +13,7 @@ stressed enough!
Unit Testing revolves around the following principles:
* ensuring correctness: making sure that your :class:`Op`, :class:`Type` or
optimization works in the way you intended it to work. It is important for
rewrites works in the way you intended it to work. It is important for
this testing to be as thorough as possible: test not only the obvious cases,
but more importantly the corner cases which are more likely to trigger bugs
down the line.
......
......@@ -46,28 +46,28 @@ Faster Aesara Function Compilation
Aesara function compilation can be time consuming. It can be sped up by setting
the flag ``mode=FAST_COMPILE`` which instructs Aesara to skip most
optimizations and disables the generation of any c/cuda code. This is useful
rewrites and disables the generation of any c/cuda code. This is useful
for quickly testing a simple idea.
If C code is necessary, the flag
``optimizer=fast_compile`` can be used instead. It instructs Aesara to
skip time consuming optimizations but still generate C code.
skip time consuming rewrites but still generate C code.
Similarly using the flag ``optimizer_excluding=inplace`` will speed up
compilation by preventing optimizations that replace operations with a
compilation by preventing rewrites that replace operations with a
version that reuses memory where it will not negatively impact the
integrity of the operation. Such optimizations can be time
integrity of the operation. Such rewrites can be time
consuming. However using this flag will result in greater memory usage
because space must be allocated for the results which would be
unnecessary otherwise. In short, using this flag will speed up
compilation but it will also use more memory because
``optimizer_excluding=inplace`` excludes inplace optimizations
``optimizer_excluding=inplace`` excludes inplace rewrites
resulting in a trade off between speed of compilation and memory
usage.
Alternatively, if the graph is big, using the flag ``cycle_detection=fast``
will speedup the computations by removing some of the inplace
optimizations. This would allow aesara to skip a time consuming cycle
rewrites. This would allow aesara to skip a time consuming cycle
detection algorithm. If the graph is big enough,we suggest that you use
this flag instead of ``optimizer_excluding=inplace``. It will result in a
computation time that is in between fast compile and fast run.
......@@ -82,23 +82,23 @@ garbage collection will keep all intermediate results' memory space to allow to
reuse them during the next call to the same Aesara function, if they are of the
correct shape. The shape could change if the shapes of the inputs change.
.. _unsafe_optimization:
.. _unsafe_rewrites:
Unsafe optimization
===================
Unsafe Rewrites
===============
Some Aesara optimizations make the assumption that the user inputs are
Some Aesara rewrites make the assumption that the user inputs are
valid. What this means is that if the user provides invalid values (like
incompatible shapes or indexing values that are out of bounds) and
the optimizations are applied, the user error will get lost. Most of the
the rewrites are applied, the user error will get lost. Most of the
time, the assumption is that the user inputs are valid. So it is good
to have the optimization being applied, but losing the error is bad.
The newest optimization in Aesara with such assumption will add an
to have the rewrite applied, but losing the error is bad.
The newest rewrite in Aesara with such an assumption will add an
assertion in the graph to keep the user error message. Computing
these assertions could take some time. If you are sure everything is valid
in your graph and want the fastest possible Aesara, you can enable an
optimization that will remove those assertions with:
in your graph and want the fastest possible Aesara, you can enable a
rewrite that will remove the assertions with:
``optimizer_including=local_remove_all_assert``
......
......@@ -68,13 +68,13 @@ Glossary
:term:`Type`, or read more about :ref:`graphstructures`.
Destructive
An :term:`Op` is destructive (of particular input[s]) if its
An :term:`Op` is destructive--of particular input(s)--if its
computation requires that one or more inputs be overwritten or
otherwise invalidated. For example, :term:`inplace`\ :class:`Op`\s are
destructive. Destructive :class:`Op`\s can sometimes be faster than
non-destructive alternatives. Aesara encourages users not to put
destructive :class:`Op`\s into graphs that are given to :term:`aesara.function`,
but instead to trust the optimizations to insert destructive ops
but instead to trust the rewrites to insert destructive :class:`Op`\s
judiciously.
Destructive :class:`Op`\s are indicated via a :attr:`Op.destroy_map` attribute. (See
......@@ -90,14 +90,16 @@ Glossary
every element, this is an inplace operation because when you are done,
the original input has been overwritten. :class:`Op`\s representing inplace
computations are :term:`destructive`, and by default these can only be
inserted by optimizations, not user code.
inserted by rewrites, not user code.
Linker
Part of a function :term:`Mode` -- an object responsible for 'running'
the compiled function. Among other things, the linker determines whether computations are carried out with C or Python code.
A :class:`Linker` instance responsible for "running" the compiled
function. Among other things, the linker determines whether
computations are carried out with
C or Python code.
Mode
An object providing an :term:`optimizer` and a :term:`linker` that is
A :class:`Mode` instance specifying an :term:`optimizer` and a :term:`linker` that is
passed to :term:`aesara.function`. It parametrizes how an expression
graph is converted to a callable object.
......@@ -120,12 +122,6 @@ Glossary
An instance of a :term:`rewriter` that has the capacity to provide
an improvement to the performance of a graph.
Optimization
A :term:`graph` transformation applied by an :term:`optimizer` during
the compilation of a :term:`graph` by :term:`aesara.function`. These
are graph rewrites that are intended to improve the performance of
a compiled :term:`Graph`.
Pure
An :term:`Op` is *pure* if it has no :term:`destructive` side-effects.
......
......@@ -2,14 +2,20 @@
Welcome
=======
Aesara is a Python library that allows you to define, optimize, and
evaluate mathematical expressions involving multi-dimensional
arrays efficiently. Aesara features:
* **Tight integration with NumPy** -- Use ``numpy.ndarray`` in Aesara-compiled functions.
* **Efficient symbolic differentiation** -- Aesara does your derivatives for functions with one or many inputs.
* **Speed and stability optimizations** -- Get the right answer for ``log(1+x)`` even when ``x`` is really tiny.
* **Dynamic C/JAX/Numba code generation** -- Evaluate expressions faster.
Aesara is a Python library that allows you to define, optimize/rewrite, and
evaluate mathematical expressions involving multi-dimensional arrays
efficiently.
Some of Aesara's features are:
* **Tight integration with NumPy**
- Use `numpy.ndarray` in Aesara-compiled functions
* **Efficient symbolic differentiation**
- Aesara efficiently computes your derivatives for functions with one or many inputs
* **Speed and stability optimizations**
- Get the right answer for ``log(1 + x)`` even when ``x`` is near zero
* **Dynamic C/JAX/Numba code generation**
- Evaluate expressions faster
Aesara is based on `Theano`_, which has been powering large-scale computationally
intensive scientific investigations since 2007.
......
......@@ -5,28 +5,28 @@
Aesara at a Glance
==================
Aesara is a Python library that lets you define, optimize, and evaluate
mathematical expressions, especially ones involving multi-dimensional arrays
(e.g. :class:`numpy.ndarray`\s). Using Aesara it is
possible to attain speeds rivaling hand-crafted C implementations for problems
involving large amounts of data.
Aesara is a Python library that allows one to define, optimize/rewrite, and
evaluate mathematical expressions, especially ones involving multi-dimensional
arrays (e.g. :class:`numpy.ndarray`\s). Using Aesara, it is possible to attain
speeds rivaling hand-crafted C implementations for problems involving large
amounts of data.
Aesara combines aspects of a computer algebra system (CAS) with aspects of an
optimizing compiler. It can also generate customized C code for many
mathematical operations. This combination of CAS with optimizing compilation
optimizing compiler. It can also generate customized code for multiple compiled
languages and/or their Python-based interfaces, such as C, Numba, and JAX. This
combination of CAS features with optimizing compilation and transpilation
is particularly useful for tasks in which complicated mathematical expressions
are evaluated repeatedly and evaluation speed is critical. For situations
where many different expressions are each evaluated once, Aesara can minimize
the amount of compilation/analysis overhead, but still provide symbolic
the amount of compilation and analysis overhead, but still provide symbolic
features such as automatic differentiation.
Aesara's compiler applies many optimizations of varying complexity to
these symbolic expressions. These optimizations include, but are not
limited to:
Aesara's compiler applies many default optimizations of varying
complexity. These optimizations include, but are not limited to:
* constant folding
* merging of similar subgraphs, to avoid redundant calculation
* arithmetic simplification (e.g. ``x*y/x -> y``, ``--x -> x``)
* merging of similar sub-graphs, to avoid redundant calculations
* arithmetic simplifications (e.g. ``x * y / x -> y``, ``-(-x) -> x``)
* inserting efficient BLAS_ operations (e.g. ``GEMM``) in a variety of
contexts
* using memory aliasing to avoid unnecessary calculations
......@@ -37,7 +37,7 @@ limited to:
For more information see :ref:`optimizations`.
Theano
-----------------
------
The library that Aesara is based on, Theano, was written at the LISA lab to support rapid development of efficient machine learning algorithms but while Theano was commonly referred to as a "deep learning" (DL) library, Aesara is not a DL library.
......
......@@ -181,7 +181,7 @@ Reference
and update the implicit function arguments according to the `updates`.
Inputs can be given as variables or In instances.
Inputs can be given as variables or :class:`In` instances.
:class:`In` instances also have a variable, but they attach some extra
information about how call-time arguments corresponding to that variable
should be used. Similarly, :class:`Out` instances can attach information
......@@ -189,28 +189,28 @@ Reference
The default is typically 'FAST_RUN' but this can be changed in
:doc:`aesara.config <../config>`. The mode
argument controls the sort of optimizations that will be applied to the
graph, and the way the optimized graph will be evaluated.
argument controls the sort of rewrites that will be applied to the
graph, and the way the rewritten graph will be evaluated.
After each function evaluation, the `updates` mechanism can replace the
value of any SharedVariable [implicit] inputs with new values computed
value of any (implicit) `SharedVariable` inputs with new values computed
from the expressions in the `updates` list. An exception will be raised
if you give two update expressions for the same SharedVariable input (that
if you give two update expressions for the same `SharedVariable` input (that
doesn't make sense).
If a SharedVariable is not given an update expression, but has a
``default_update`` member containing an expression, this expression
If a `SharedVariable` is not given an update expression, but has a
:attr:`Variable.default_update` member containing an expression, this expression
will be used as the update expression for this variable. Passing
``no_default_updates=True`` to ``function`` disables this behavior
entirely, passing ``no_default_updates=[sharedvar1, sharedvar2]``
disables it for the mentioned variables.
Regarding givens: Be careful to make sure that these substitutions are
independent, because behaviour when Var1 of one pair appears in the graph leading
to Var2 in another expression is undefined (e.g. with ``{a: x, b: a + 1}``).
Replacements specified with
givens are different from optimizations in that Var2 is not expected to be
equivalent to Var1.
independent, because behaviour when ``Var1`` of one pair appears in the graph leading
to ``Var2`` in another expression is undefined (e.g. with ``{a: x, b: a + 1}``).
Replacements specified with givens are different from replacements that
occur during normal rewriting, in that ``Var2`` is not expected to be
equivalent to ``Var1``.
.. autofunction:: aesara.compile.function.function_dump
......
......@@ -18,8 +18,8 @@ inputs-to-outputs graph is transformed into a callable object.
Aesara defines the following modes by name:
- ``'FAST_COMPILE'``: Apply just a few graph optimizations and only use Python implementations.
- ``'FAST_RUN'``: Apply all optimizations, and use C implementations where possible.
- ``'FAST_COMPILE'``: Apply just a few graph rewrites and only use Python implementations.
- ``'FAST_RUN'``: Apply all rewrites, and use C implementations where possible.
- ``'DebugMode'``: A mode for debugging. See :ref:`DebugMode <debugmode>` for details.
- ``'NanGuardMode``: :ref:`Nan detector <nanguardmode>`
- ``'DEBUG_MODE'``: Deprecated. Use the string DebugMode.
......@@ -30,7 +30,7 @@ overridden by passing the keyword argument to :func:`aesara.function`.
.. TODO::
For a finer level of control over which optimizations are applied, and whether
For a finer level of control over which rewrites are applied, and whether
C or Python implementations are used, read.... what exactly?
......@@ -43,9 +43,9 @@ Reference
.. class:: Mode(object)
Compilation is controlled by two attributes: the `optimizer` controls how
an expression graph will be transformed; the `linker` controls how the
optimized expression graph will be evaluated.
Compilation is controlled by two attributes: the :attr:`optimizer` controls how
an expression graph will be transformed; the :attr:`linker` controls how the
rewritten expression graph will be evaluated.
.. attribute:: optimizer
......@@ -57,15 +57,15 @@ Reference
.. method:: including(*tags)
Return a new Mode instance like this one, but with an
optimizer modified by including the given tags.
Return a new :class:`Mode` instance like this one, but with its
:attr:`optimizer` modified by including the given tags.
.. method:: excluding(*tags)
Return a new Mode instance like this one, but with an
optimizer modified by excluding the given tags.
Return a new :class:`Mode` instance like this one, but with an
:attr:`optimizer` modified by excluding the given tags.
.. method:: requiring(*tags)
Return a new Mode instance like this one, but with an
optimizer modified by requiring the given tags.
Return a new :class:`Mode` instance like this one, but with an
:attr:`optimizer` modified by requiring the given tags.
......@@ -2,22 +2,22 @@
.. _opfromgraph:
===========
OpFromGraph
===========
============
`OpFromGraph`
============
This page describes :class:`aesara.compile.builders.OpFromGraph
<aesara.compile.builders.OpFromGraph>`, an Op that allows to
encapsulate an Aesara graph in an op.
<aesara.compile.builders.OpFromGraph>`, an `Op` constructor that allows one to
encapsulate an Aesara graph in a single `Op`.
This can be used to encapsulate some functionality in one block. It is
useful to scale Aesara compilation for regular bigger graphs when we
reuse that encapsulated functionality with different inputs many
times. Due to this encapsulation, it can make Aesara compilation phase
times. Due to this encapsulation, it can make Aesara's compilation phase
faster for graphs with many nodes.
Using this for small graphs is not recommended as it disables
optimizations between what is inside the encapsulation and outside of it.
rewrites between what is inside the encapsulation and outside of it.
.. note:
......
......@@ -170,8 +170,8 @@ import ``aesara`` and print the config variable, as in:
Default: ``True``
This enables, or disables, an optimization in :class:`Scan` that tries to
pre-allocate memory for its outputs. Enabling the optimization can give a
This enables, or disables, a rewrite in :class:`Scan` that tries to
pre-allocate memory for its outputs. Enabling the rewrite can give a
significant speed up at the cost of slightly increased memory usage.
.. attribute:: config.scan__allow_gc
......@@ -202,10 +202,10 @@ import ``aesara`` and print the config variable, as in:
Default: ``off``
This is a flag for checking the stack trace during graph optimization.
This is a flag for checking stack traces during graph rewriting.
If :attr:`check_stack_trace` is set to ``off``, no check is performed on the
stack trace. If :attr:`check_stack_trace` is set to ``log`` or ``warn``, a
dummy stack trace is inserted that indicates which optimization inserted the
dummy stack trace is inserted that indicates which rewrite inserted the
variable that had an empty stack trace, but, when ``warn`` is set, a warning
is also printed.
If :attr:`check_stack_trace` is set to ``raise``, an exception is raised if a
......@@ -315,7 +315,7 @@ import ``aesara`` and print the config variable, as in:
Default: ``False``
When ``True``, the VM and CVM linkers profile the optimization phase when
When ``True``, the :class:`VM` and :class:`CVM` linkers profile the rewriting phase when
compiling an Aesara function. This only works when ``profile=True``.
.. attribute:: config.profiling__n_apply
......@@ -398,7 +398,7 @@ import ``aesara`` and print the config variable, as in:
Default: ``'fast_run'``
When the mode is ``'Mode'``, it sets the default optimizer used.
When the mode is ``'Mode'``, it sets the default rewrites used during compilation.
.. attribute:: on_opt_error
......@@ -406,8 +406,8 @@ import ``aesara`` and print the config variable, as in:
Default: ``'warn'``
When a crash occurs while trying to apply an optimization, either warn the
user and skip the optimization (i.e. ``'warn'``), raise the exception
When a crash occurs while trying to apply a rewrite, either warn the
user and skip the rewrite (i.e. ``'warn'``), raise the exception
(i.e. ``'raise'``), drop into the ``pdb`` debugger (i.e. ``'pdb'``), or
ignore it (i.e. ``'ignore'``).
We suggest never using ``'ignore'`` except during testing.
......@@ -503,9 +503,9 @@ import ``aesara`` and print the config variable, as in:
When ``True``, add asserts that highlight shape errors.
Without such asserts, the underlying optimization could hide errors in user
Without such asserts, the underlying rewrite could hide errors in user
code. Aesara adds the asserts only if it cannot infer that the shapes are
equivalent. When it can determine equivalence, this optimization does not
equivalent. When it can determine equivalence, this rewrite does not
introduce an assert.
Removing these asserts can speed up execution.
......@@ -653,11 +653,11 @@ import ``aesara`` and print the config variable, as in:
Default: ``""``
A list of optimizer tags that shouldn't be included in the default ``Mode``.
A list of rewriter tags that shouldn't be included in the default ``Mode``.
If multiple tags are provided, separate them by ``':'``.
For example, to remove the ``Elemwise`` in-place optimizations,
For example, to remove the ``Elemwise`` in-place rewrites,
use the flags: ``optimizer_excluding:inplace_opt``, where
``inplace_opt`` is the name of the optimization group.
``inplace_opt`` is the name of the rewrite group.
This flag's value cannot be modified during the program execution.
......@@ -665,7 +665,7 @@ import ``aesara`` and print the config variable, as in:
Default: ``""``
A list of optimizer tags to be included in the default ``Mode``.
A list of rewriter tags to be included in the default ``Mode``.
If multiple tags are provided, separate them by ``':'``.
This flag's value cannot be modified during the program execution.
......@@ -674,7 +674,7 @@ import ``aesara`` and print the config variable, as in:
Default: ``""``
A list of optimizer tags that are required for optimization in the default
A list of rewriter tags that are required for rewriting in the default
``Mode``.
If multiple tags are provided, separate them by ``':'``.
......@@ -686,7 +686,7 @@ import ``aesara`` and print the config variable, as in:
Default: ``False``
When ``True``, print the optimizations applied to stdout.
When ``True``, print the rewrites applied to stdout.
.. attribute:: nocleanup
......@@ -792,7 +792,7 @@ import ``aesara`` and print the config variable, as in:
Setting this attribute to something other than ``'off'`` activates a
debugging mechanism, for which Aesara executes the graph on-the-fly, as it
is being built. This allows the user to spot errors early on (such as
dimension mis-matches) **before** optimizations are applied.
dimension mis-matches) **before** rewrites are applied.
Aesara will execute the graph using constants and/or shared variables
provided by the user. Purely symbolic variables (e.g. ``x =
......@@ -809,8 +809,8 @@ import ``aesara`` and print the config variable, as in:
.. attribute:: compute_test_value_opt
As ``compute_test_value``, but it is the value used during Aesara's
optimization phase. This is used to help debug shape errors in Aesara's
optimizations.
rewriting phase. This is used to help debug shape errors in Aesara's
rewrites.
.. attribute:: print_test_value
......@@ -898,21 +898,21 @@ import ``aesara`` and print the config variable, as in:
Int value, default: 0
The verbosity level of the meta-optimizer: ``0`` for silent, ``1`` to only
warn when Aesara cannot meta-optimize an :class:`Op`, ``2`` for full output (e.g.
timings and the optimizations selected).
The verbosity level of the meta-rewriter: ``0`` for silent, ``1`` to only
warn when Aesara cannot meta-rewrite an :class:`Op`, ``2`` for full output (e.g.
timings and the rewrites selected).
.. attribute:: config.metaopt__optimizer_excluding
Default: ``""``
A list of optimizer tags that we don't want included in the meta-optimizer.
A list of rewrite tags that we don't want included in the meta-rewriter.
Multiple tags are separate by ``':'``.
.. attribute:: config.metaopt__optimizer_including
Default: ``""``
A list of optimizer tags to be included during meta-optimization.
A list of rewriter tags to be included during meta-rewriting.
Multiple tags are separate by ``':'``.
......@@ -33,7 +33,7 @@ hello world __str__ = [ 1. 2. 3.]
If you print more than one thing in a function like `f`, they will not
necessarily be printed in the order that you think. The order might even depend
on which graph optimizations are applied. Strictly speaking, the order of
on which graph rewrites are applied. Strictly speaking, the order of
printing is not completely defined by the interface --
the only hard rule is that if the input of some print output `a` is
ultimately used as an input to some other print input `b` (so that `b` depends on `a`),
......@@ -56,7 +56,7 @@ Aesara also provides :func:`aesara.printing.pydotprint` that creates a png image
>>> x = at.dscalar('x')
>>> y = x ** 2
>>> gy = grad(y, x)
>>> pp(gy) # print out the gradient prior to optimization
>>> pp(gy) # print out the gradient prior to rewriting
'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
>>> f = function([x], gy)
>>> pp(f.maker.fgraph.outputs[0])
......
......@@ -81,7 +81,7 @@ Scan returns a tuple containing our result (``result``) and a
dictionary of updates (empty in this case). Note that the result
is not a matrix, but a 3D tensor containing the value of ``A**k`` for
each step. We want the last value (after ``k`` steps) so we compile
a function to return just that. Note that there is an optimization, that
a function to return just that. Note that there is a rewrite that
at compile time will detect that you are using just the last value of the
result and ensure that scan does not store all the intermediate values
that are used. So do not worry if ``A`` and ``k`` are large.
......@@ -341,7 +341,7 @@ function applied at each step) you do not need to pass them as arguments.
Scan will find them on its own and add them to the graph.
However, passing them to the scan function is a good practice, as it avoids
Scan Op calling any earlier (external) Op over and over. This results in a
simpler computational graph, which speeds up the optimization and the
simpler computational graph, which speeds up the rewriting and the
execution. To pass the shared variables to Scan you need to put them in a list
and give it to the ``non_sequences`` argument. Here is the Gibbs sampling code
updated:
......@@ -381,7 +381,7 @@ Using shared variables - the strict flag
----------------------------------------
As we just saw, passing the shared variables to scan may result in a simpler
computational graph, which speeds up the optimization and the execution. A
computational graph, which speeds up the rewriting and the execution. A
good way to remember to pass every shared variable used during scan is to use
the ``strict`` flag. When set to true, scan checks that all the necessary shared
variables in ``fn`` are passed as explicit arguments to ``fn``. This has to be
......@@ -599,8 +599,8 @@ about 6x slower than the forward, a ~20% slowdown is expected. Apart from the
is similar to the classic ``scan`` function.
Optimizing Scan's performance
-----------------------------
Improving Scan's performance
----------------------------
This section covers some ways to improve performance of an Aesara function
using Scan.
......@@ -645,29 +645,29 @@ is not provided for this argument, the value of the flag
``config.scan__allow_gc`` is used).
Graph optimizations
^^^^^^^^^^^^^^^^^^^
Graph Rewrites
^^^^^^^^^^^^^^
This one is simple but still worth pointing out. Aesara is able to
automatically recognize and optimize many computation patterns. However, there
are patterns that Aesara doesn't optimize because doing so would change the
automatically recognize and rewrite many computation patterns. However, there
are patterns that Aesara doesn't rewrite because doing so would change the
user interface (such as merging shared variables together into a single one,
for instance). Additionally, Aesara doesn't catch every case that it could
optimize and so it remains useful for performance that the user defines an
rewrite and so it remains useful for performance that the user defines an
efficient graph in the first place. This is also the case, and sometimes even
more so, for the graph inside of Scan. This is because it will be executed
many times for every execution of the Aesara function that contains it.
The `LSTM tutorial <http://deeplearning.net/tutorial/lstm.html>`_ on
`DeepLearning.net <http://deeplearning.net>`_ provides an example of an
optimization that Aesara cannot perform. Instead of performing many matrix
`DeepLearning.net <http://deeplearning.net>`_ provides an example of a
rewrite that Aesara cannot perform. Instead of performing many matrix
multiplications between matrix :math:`x_t` and each of the shared matrices
:math:`W_i`, :math:`W_c`, :math:`W_f` and :math:`W_o`, the matrices
:math:`W_*`, are merged into a single shared matrix :math:`W` and the graph
performs a single larger matrix multiplication between :math:`W` and
:math:`x_t`. The resulting matrix is then sliced to obtain the results of that
the small individual matrix multiplications would have produced. This
optimization replaces several small and inefficient matrix multiplications by
rewrite replaces several small and inefficient matrix multiplications by
a single larger one and thus improves performance at the cost of a potentially
higher memory usage.
......
......@@ -231,18 +231,18 @@ List of Implemented Operations
- :func:`sampling_dot <aesara.sparse.basic.sampling_dot>`.
- Both inputs must be dense.
- The grad implemented is structured for `p`.
- The grad implemented is structured for ``p``.
- Sample of the dot and sample of the gradient.
- C code for perform but not for grad.
- Returns sparse for perform and grad.
- :func:`usmm <aesara.sparse.basic.usmm>`.
- You *shouldn't* insert this op yourself!
- There is an optimization that transform a
:func:`dot <aesara.sparse.basic.dot>` to ``Usmm`` when possible.
- There is a rewrite that transforms a
:func:`dot <aesara.sparse.basic.dot>` to :class:`Usmm` when possible.
- This op is the equivalent of gemm for sparse dot.
- There is no grad implemented for this op.
- This :class:`Op` is the equivalent of gemm for sparse dot.
- There is no grad implemented for this :class:`Op`.
- One of the inputs must be sparse, the other sparse or dense.
- Returns a dense from perform.
......
......@@ -1199,7 +1199,7 @@ Bitwise
Inplace
-------
In-place operators are *not* supported. Aesara's graph-optimizations
In-place operators are *not* supported. Aesara's graph rewrites
will determine which intermediate values to use for in-place
computations. If you would like to update the value of a
:term:`shared variable`, consider using the ``updates`` argument to
......
===================================================================
:mod:`tensor.basic_opt` -- Tensor Optimizations
:mod:`tensor.basic_opt` -- Tensor Rewrites
===================================================================
.. module:: tensor.basic_opt
:platform: Unix, Windows
:synopsis: Tensor Optimizations
:synopsis: Tensor Rewrites
.. moduleauthor:: LISA, PyMC Developers, Aesara Developers
.. automodule:: aesara.tensor.basic_opt
......
===================================================================
:mod:`tensor.math_opt` -- Tensor Optimizations for Math Operations
===================================================================
==============================================================
:mod:`tensor.math_opt` -- Tensor Rewrites for Math Operations
==============================================================
.. module:: tensor.math_opt
:platform: Unix, Windows
:synopsis: Tensor Optimizations for Math Operations
:synopsis: Tensor Rewrites for Math Operations
.. moduleauthor:: LISA, PyMC Developers, Aesara Developers
.. automodule:: aesara.tensor.math_opt
......
......@@ -61,45 +61,44 @@
.. function:: ultra_fast_sigmoid(x)
Returns the *approximated* standard :func:`sigmoid` nonlinearity applied to x.
:Parameters: *x* - symbolic Tensor (or compatible)
:Return type: same as x
Returns an approximate standard :func:`sigmoid` nonlinearity applied to ``x``.
:Parameters: ``x`` - symbolic Tensor (or compatible)
:Return type: same as ``x``
:Returns: approximated element-wise sigmoid: :math:`sigmoid(x) = \frac{1}{1 + \exp(-x)}`.
:note: To automatically change all :func:`sigmoid` ops to this version, use
the Aesara optimization ``local_ultra_fast_sigmoid``. This can be done
:note: To automatically change all :func:`sigmoid`\ :class:`Op`\s to this version, use
the Aesara rewrite `local_ultra_fast_sigmoid`. This can be done
with the Aesara flag ``optimizer_including=local_ultra_fast_sigmoid``.
This optimization is done late, so it should not affect
stabilization optimization.
This rewrite is done late, so it should not affect stabilization rewrites.
.. note:: The underlying code will return 0.00247262315663 as the
minimum value and 0.997527376843 as the maximum value. So it
never returns 0 or 1.
.. note:: Using directly the ultra_fast_sigmoid in the graph will
disable stabilization optimization associated with it. But
using the optimization to insert them won't disable the
stability optimization.
.. note:: Using directly the `ultra_fast_sigmoid` in the graph will
disable stabilization rewrites associated with it. But
using the rewrite to insert them won't disable the
stability rewrites.
.. function:: hard_sigmoid(x)
Returns the *approximated* standard :func:`sigmoid` nonlinearity applied to x.
:Parameters: *x* - symbolic Tensor (or compatible)
:Return type: same as x
Returns an approximate standard :func:`sigmoid` nonlinearity applied to `1x1`.
:Parameters: ``x`` - symbolic Tensor (or compatible)
:Return type: same as ``x``
:Returns: approximated element-wise sigmoid: :math:`sigmoid(x) = \frac{1}{1 + \exp(-x)}`.
:note: To automatically change all :func:`sigmoid` ops to this version, use
the Aesara optimization ``local_hard_sigmoid``. This can be done
:note: To automatically change all :func:`sigmoid`\ :class:`Op`\s to this version, use
the Aesara rewrite `local_hard_sigmoid`. This can be done
with the Aesara flag ``optimizer_including=local_hard_sigmoid``.
This optimization is done late, so it should not affect
stabilization optimization.
This rewrite is done late, so it should not affect
stabilization rewrites.
.. note:: The underlying code will return an exact 0 or 1 if an
element of x is too small or too big.
element of ``x`` is too small or too big.
.. note:: Using directly the ultra_fast_sigmoid in the graph will
disable stabilization optimization associated with it. But
using the optimization to insert them won't disable the
stability optimization.
.. note:: Using directly the `ultra_fast_sigmoid` in the graph will
disable stabilization rewrites associated with it. But
using the rewrites to insert them won't disable the
stability rewrites.
.. function:: softplus(x)
......
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论