Change more uses of "optimize" terminology

2dc0af2f · Brandon T. Willard · Brandon T. Willard · 45f48ae6 · 2dc0af2f · 2dc0af2f
--- a/aesara/compile/function/types.py
+++ b/aesara/compile/function/types.py
-"""
+"""Objects that orchestrate graph construction, rewriting, and linking."""
-Driver of graph construction, optimization, and linking.
-"""
 import copy
 import copyreg
@@ -753,9 +750,8 @@ class Function:
            # cause problems.
            on_unused_input="ignore",
            function_builder=maker.function_builder,
-            # As this is an optimized graph, it
+            # As this is an rewritten graph, it can contain inplace. DebugMode
-            # can contain inplace. DebugMode check
+            # check that.
-            # that.
            accept_inplace=True,
            no_fgraph_prep=True,
        ).create(input_storage, storage_map=new_storage_map)
@@ -1182,7 +1178,7 @@ def insert_deepcopy(fgraph, wrapped_inputs, wrapped_outputs):
    This loop was inserted to remove aliasing between outputs when they all
    evaluate to the same value. Originally it was OK for outputs to be aliased,
    but some of the outputs can be shared variables, and is not good for shared
-    variables to be aliased. It might be possible to optimize this by making
+    variables to be aliased. It might be possible to rewrite this by making
    sure there is no aliasing only between shared variables.
    If some outputs are constant, we add deep copy to respect the memory
@@ -1279,7 +1275,7 @@ class FunctionMaker:
    """
    `FunctionMaker` is the class to `create` `Function` instances.
-    This class has the fgraph, the optimizer, and the linker. When
+    This class has the fgraph, the rewriter, and the linker. When
    copying a `Function`, there is no need to duplicate the
    `FunctionMaker` instance. Deepcopy still copies both, which can
    variable in re-compilation.
@@ -1292,7 +1288,7 @@ class FunctionMaker:
        functions produced by FunctionMaker will return their output value
        directly.
    mode : Mode instance
-        Telling FunctionMaker how to optimize and link. None means to use the
+        Telling FunctionMaker how to rewrite and link. None means to use the
        `config.mode`.
    accept_inplace : bool
        True iff it is acceptable to have inplace operations in the graph from
@@ -1395,44 +1391,44 @@ class FunctionMaker:
    @staticmethod
    def prepare_fgraph(
-        inputs, outputs, additional_outputs, fgraph, optimizer, linker, profile
+        inputs, outputs, additional_outputs, fgraph, rewriter, linker, profile
    ):
        try:
-            start_optimizer = time.time()
+            start_rewriter = time.time()
-            optimizer_profile = None
+            rewriter_profile = None
-            opt_time = None
+            rewrite_time = None
            with config.change_flags(
                compute_test_value=config.compute_test_value_opt,
                traceback__limit=config.traceback__compile_limit,
            ):
-                optimizer_profile = optimizer(fgraph)
+                rewriter_profile = rewriter(fgraph)
-                end_optimizer = time.time()
+                end_rewriter = time.time()
-                opt_time = end_optimizer - start_optimizer
+                rewrite_time = end_rewriter - start_rewriter
-                _logger.debug(f"Optimizing took {opt_time:f} seconds")
+                _logger.debug(f"Rewriting took {rewrite_time:f} seconds")
                # Add deep copy to respect the memory interface
                insert_deepcopy(fgraph, inputs, outputs + additional_outputs)
        finally:
-            # If the optimizer got interrupted
+            # If the rewriter got interrupted
-            if opt_time is None:
+            if rewrite_time is None:
-                end_optimizer = time.time()
+                end_rewriter = time.time()
-                opt_time = end_optimizer - start_optimizer
+                rewrite_time = end_rewriter - start_rewriter
-            aesara.compile.profiling.total_graph_opt_time += opt_time
+            aesara.compile.profiling.total_graph_rewrite_time += rewrite_time
            if profile:
-                if optimizer_profile is None and hasattr(optimizer, "pre_profile"):
+                if rewriter_profile is None and hasattr(rewriter, "pre_profile"):
-                    optimizer_profile = optimizer.pre_profile
+                    rewriter_profile = rewriter.pre_profile
-                profile.optimizer_time += opt_time
+                profile.rewriting_time += rewrite_time
                if config.profile_optimizer:
-                    profile.optimizer_profile = (optimizer, optimizer_profile)
+                    profile.rewriter_profile = (rewriter, rewriter_profile)
            elif config.profile_optimizer and profile is not False:
                # If False, it means the profiling for that function was
                # explicitly disabled
@@ -1466,8 +1462,8 @@ class FunctionMaker:
    ):
        # Save the provided mode, not the instantiated mode.
        # The instantiated mode don't pickle and if we unpickle an Aesara
-        # function and it get re-compiled, we want the current optimizer to be
+        # function and it get re-compiled, we want the current rewriter to be
-        # used, not the optimizer when it was saved.
+        # used, not the rewriter when it was saved.
        self.mode = mode
        mode = aesara.compile.mode.get_mode(mode)
@@ -1478,7 +1474,7 @@ class FunctionMaker:
        if profile:
            # This is very important:
            # 1) We preload the cache here to not have its timing
-            #    included in optimization that compile function.
+            #    included with the rewrites.
            # 2) Do not refresh the cache here by default. It cause
            #    too much execution time during testing as we compile
            #    much more functions then the number of compile c
@@ -1515,11 +1511,11 @@ class FunctionMaker:
        self.fgraph = fgraph
-        optimizer, linker = mode.optimizer, copy.copy(mode.linker)
+        rewriter, linker = mode.optimizer, copy.copy(mode.linker)
        if not no_fgraph_prep:
            self.prepare_fgraph(
-                inputs, outputs, found_updates, fgraph, optimizer, linker, profile
+                inputs, outputs, found_updates, fgraph, rewriter, linker, profile
            )
        assert len(fgraph.outputs) == len(outputs + found_updates)
@@ -1715,7 +1711,7 @@ def orig_function(
        time spent in this function.
    accept_inplace : bool
        True iff the graph can contain inplace operations prior to the
-        optimization phase (default is False).
+        rewrite phase (default is False).
    profile : None or ProfileStats instance
    on_unused_input : {'raise', 'warn', 'ignore', None}
        What to do if a variable in the 'inputs' list is not used in the graph.

--- a/aesara/compile/mode.py
+++ b/aesara/compile/mode.py
@@ -190,7 +190,7 @@ optdb.register(
 # The opt should not do anything that need shape inference.
 # New nodes that don't have infer_shape need that the original node
 # also don't have infer_shape
-local_useless = LocalGroupDB(apply_all_opts=True, profile=True)
+local_useless = LocalGroupDB(apply_all_rewrites=True, profile=True)
 optdb.register(
    "useless",
    TopoDB(local_useless, failure_callback=NodeProcessingGraphRewriter.warn_inplace),
@@ -276,20 +276,19 @@ del _tags
 class Mode:
-    """
+    """A class that specifies the rewrites/optimizations used during function compilation.
-    The Mode represents a way to optimize and then link a computation graph.
    Parameters
    ----------
-    optimizer: a structure of type Optimizer
+    optimizer
        An Optimizer may simplify the math, put similar computations together,
        improve numerical stability and various other improvements.
-    linker: a structure of type Linker
+    linker
        A Linker decides which implementations to use (C or Python, for example)
        and how to string them together to perform the computation.
-    db:
+    db
-        The ``RewriteDatabase`` used by this ``Mode``.  Note: This value
+        The `RewriteDatabase` used by this `Mode`.  Note: This value
-        is *not* part of a ``Mode`` instance's pickled state.
+        is *not* part of a `Mode` instance's pickled state.
    See Also
    --------

--- a/aesara/compile/profiling.py
+++ b/aesara/compile/profiling.py
@@ -45,7 +45,7 @@ logger = logging.getLogger("aesara.compile.profiling")
 aesara_imported_time: float = time.time()
 total_fct_exec_time: float = 0.0
-total_graph_opt_time: float = 0.0
+total_graph_rewrite_time: float = 0.0
 total_time_linker: float = 0.0
 _atexit_print_list: List["ProfileStats"] = []
@@ -97,7 +97,7 @@ def _atexit_print_fn():
                        "fct_call_time",
                        "fct_callcount",
                        "vm_call_time",
-                        "optimizer_time",
+                        "rewriter_time",
                        "linker_time",
                        "validate_time",
                        "import_time",
@@ -120,18 +120,18 @@ def _atexit_print_fn():
                            assert key not in cum_attr, (key, cum_attr)
                            cum_attr[key] = val
-                    if cum.optimizer_profile and ps.optimizer_profile:
+                    if cum.rewriter_profile and ps.rewriter_profile:
                        try:
-                            merge = cum.optimizer_profile[0].merge_profile(
+                            merge = cum.rewriter_profile[0].merge_profile(
-                                cum.optimizer_profile[1], ps.optimizer_profile[1]
+                                cum.rewriter_profile[1], ps.rewriter_profile[1]
                            )
-                            assert len(merge) == len(cum.optimizer_profile[1])
+                            assert len(merge) == len(cum.rewriter_profile[1])
-                            cum.optimizer_profile = (cum.optimizer_profile[0], merge)
+                            cum.rewriter_profile = (cum.rewriter_profile[0], merge)
                        except Exception as e:
                            print(e)
-                            cum.optimizer_profile = None
+                            cum.rewriter_profile = None
                    else:
-                        cum.optimizer_profile = None
+                        cum.rewriter_profile = None
                cum.summary(
                    file=destination_file,
@@ -149,7 +149,7 @@ def print_global_stats():
      -- Time elapsed since Aesara was imported
      -- Time spent inside Aesara functions
      -- Time spent in compiling Aesara functions
-           -- on graph optimization
+           -- on graph rewriters
           -- on linker
    """
@@ -168,7 +168,7 @@ def print_global_stats():
                f"Time elasped since Aesara import = {time.time() - aesara_imported_time:6.3f}s, "
                f"Time spent in Aesara functions = {total_fct_exec_time:6.3f}s, "
                "Time spent compiling Aesara functions: "
-                f" optimization = {total_graph_opt_time:6.3f}s, linker = {total_time_linker:6.3f}s ",
+                f"rewriting = {total_graph_rewrite_time:6.3f}s, linking = {total_time_linker:6.3f}s ",
            ),
            file=destination_file,
        )
@@ -186,7 +186,7 @@ def register_profiler_printer(fct):
 class ProfileStats:
    """
    Object to store runtime and memory profiling information for all of
-    Aesara's operations: compilation, optimization, execution.
+    Aesara's operations: compilation, rewriting, execution.
    Parameters
    ----------
@@ -220,7 +220,7 @@ class ProfileStats:
    compile_time: float = 0.0
    # Total time spent in body of orig_function,
-    # dominated by graph optimization and compilation of C
+    # dominated by graph rewriting and compilation of C
    #
    fct_call_time: float = 0.0
@@ -259,12 +259,12 @@ class ProfileStats:
    # Variable -> offset
    #
-    optimizer_time: float = 0.0
+    rewriting_time: float = 0.0
-    # time spent optimizing graph (FunctionMaker.__init__)
+    # time spent rewriting graph (FunctionMaker.__init__)
    validate_time: float = 0.0
    # time spent in fgraph.validate
-    # This is a subset of optimizer_time that is dominated by toposort()
+    # This is a subset of rewriting_time that is dominated by toposort()
    # when the destorymap feature is included.
    linker_time: float = 0.0
@@ -284,8 +284,8 @@ class ProfileStats:
    # case we print the profile when the function wasn't executed, or if there
    # is a lazy operation in the graph.
-    optimizer_profile = None
+    rewriter_profile = None
-    # None or tuple (the optimizer, the profile it returned)
+    # None or tuple (the rewriter, the profile it returned)
    # param is called flag_time_thunks because most other attributes with time
    # in the name are times *of* something, rather than configuration flags.
@@ -801,9 +801,9 @@ class ProfileStats:
                    f"  Time in thunks: {local_time}s ({100 * local_time / self.fct_call_time:.3f}%)",
                    file=file,
                )
-        print(f"  Total compile time: {self.compile_time:e}s", file=file)
+        print(f"  Total compilation time: {self.compile_time:e}s", file=file)
        print(f"    Number of Apply nodes: {int(self.nb_nodes)}", file=file)
-        print(f"    Aesara Optimizer time: {self.optimizer_time:e}s", file=file)
+        print(f"    Aesara rewrite time: {self.rewriting_time:e}s", file=file)
        print(f"       Aesara validate time: {self.validate_time:e}s", file=file)
        print(
            (
@@ -823,9 +823,8 @@ class ProfileStats:
            print(f"           Node {node} time {t:e}s", file=file)
        print("", file=file)
-        # The validation time is a subset of optimizer_time
+        if self.rewriting_time > 0:
-        if self.optimizer_time > 0:
+            assert self.validate_time < self.rewriting_time
-            assert self.validate_time < self.optimizer_time
    def summary_globals(self, file):
        print(
@@ -1468,10 +1467,10 @@ class ProfileStats:
            aesara.printing.debugprint(fcts, print_type=True)
        if self.variable_shape or self.variable_strides:
            self.summary_memory(file, n_apply_to_print)
-        if self.optimizer_profile:
+        if self.rewriter_profile:
-            print("Optimizer Profile", file=file)
+            print("Rewriter Profile", file=file)
-            print("-----------------", file=file)
+            print("----------------", file=file)
-            self.optimizer_profile[0].print_profile(file, self.optimizer_profile[1])
+            self.rewriter_profile[0].print_profile(file, self.rewriter_profile[1])
        self.print_extra(file)
        self.print_tips(file)
@@ -1619,7 +1618,7 @@ class ProfileStats:
            ):
                print(
                    (
-                        "  - You have a dot operation that was not optimized to"
+                        "  - You have a dot operation that was not rewritten to"
                        " dot22 (which is faster). Make sure the inputs are "
                        "float32 or float64, and are the same for both inputs. "
                        f"Currently they are: {[i.type for i in node.inputs]}"

--- a/aesara/graph/features.py
+++ b/aesara/graph/features.py
@@ -603,13 +603,13 @@ class ReplaceValidate(History, Validator):
            fgraph.revert(chk)
            if verbose:
                print(
-                    f"optimizer: validate failed on node {r}.\n Reason: {reason}, {e}"
+                    f"rewriting: validate failed on node {r}.\n Reason: {reason}, {e}"
                )
            raise
        if verbose:
            print(
-                f"optimizer: rewrite {reason} replaces {r} of {r.owner} with {new_r} of {new_r.owner}"
+                f"rewriting: rewrite {reason} replaces {r} of {r.owner} with {new_r} of {new_r.owner}"
            )
        # The return is needed by replace_all_validate_remove

--- a/aesara/graph/fg.py
+++ b/aesara/graph/fg.py
@@ -481,7 +481,7 @@ class FunctionGraph(MetaObject):
            verbose = config.optimizer_verbose
        if verbose:
            print(
-                f"optimizer: rewrite {reason} replaces {var} of {var.owner} with {new_var} of {new_var.owner}"
+                f"rewriting: rewrite {reason} replaces {var} of {var.owner} with {new_var} of {new_var.owner}"
            )
        new_var = var.type.filter_variable(new_var, allow_convert=True)

--- a/aesara/graph/opt.py
+++ b/aesara/graph/opt.py
--- a/aesara/graph/optdb.py
+++ b/aesara/graph/optdb.py
--- a/aesara/tensor/basic_opt.py
+++ b/aesara/tensor/basic_opt.py
@@ -177,10 +177,10 @@ class InplaceElemwiseOptimizer(GraphRewriter):
        fgraph.attach_feature(DestroyHandler())
-    @staticmethod
+    @classmethod
-    def print_profile(stream, prof, level=0):
+    def print_profile(cls, stream, prof, level=0):
        blanc = "    " * level
-        print(blanc, "InplaceElemwiseOptimizer ", prof["opt"].op, file=stream)
+        print(blanc, cls.__name__, prof["opt"].op, file=stream)
        for k in [
            "node_before",
            "nb_call_replace",
@@ -3164,10 +3164,10 @@ class FusionOptimizer(GraphRewriter):
            time_toposort,
        )
-    @staticmethod
+    @classmethod
-    def print_profile(stream, prof, level=0):
+    def print_profile(cls, stream, prof, level=0):
        blanc = "    " * level
-        print(blanc, "FusionOptimizer", file=stream)
+        print(blanc, cls.__name__, file=stream)
        print(blanc, " nb_iter", prof[1], file=stream)
        print(blanc, " nb_replacement", prof[2], file=stream)
        print(blanc, " nb_inconsistency_replace", prof[3], file=stream)

--- a/aesara/tensor/blas.py
+++ b/aesara/tensor/blas.py
@@ -1618,10 +1618,10 @@ class GemmOptimizer(GraphRewriter):
            callbacks_time,
        )
-    @staticmethod
+    @classmethod
-    def print_profile(stream, prof, level=0):
+    def print_profile(cls, stream, prof, level=0):
        blanc = "    " * level
-        print(blanc, "GemmOptimizer", file=stream)
+        print(blanc, cls.__name__, file=stream)
        print(blanc, " nb_iter", prof[1], file=stream)
        print(blanc, " nb_replacement", prof[2], file=stream)
        print(blanc, " nb_replacement_didn_t_remove", prof[3], file=stream)

--- a/aesara/tensor/math_opt.py
+++ b/aesara/tensor/math_opt.py
--- a/aesara/tensor/nnet/abstract_conv.py
+++ b/aesara/tensor/nnet/abstract_conv.py
@@ -2763,13 +2763,9 @@ class AbstractConv3d(AbstractConv):
 class AbstractConv_gradWeights(BaseAbstractConv):
-    """Gradient wrt. filters for `AbstractConv`.
+    """Gradient with respect to filters for `AbstractConv`.
-    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
-    for a more detailed documentation.
-    :note: You will not want to use this directly, but rely on
+    Refer to :class:`BaseAbstractConv` for more detailed documentation.
-           Aesara's automatic differentiation or graph optimization to
-           use it as needed.
    """
@@ -2991,13 +2987,9 @@ class AbstractConv_gradWeights(BaseAbstractConv):
 class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
-    """Gradient wrt. filters for `AbstractConv2d`.
+    """Gradient with respect to filters for `AbstractConv2d`.
-    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
-    for a more detailed documentation.
-    :note: You will not want to use this directly, but rely on
+    Refer to :class:`BaseAbstractConv` for more detailed documentation.
-           Aesara's automatic differentiation or graph optimization to
-           use it as needed.
    """
@@ -3058,13 +3050,9 @@ class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
 class AbstractConv3d_gradWeights(AbstractConv_gradWeights):
-    """Gradient wrt. filters for `AbstractConv3d`.
+    """Gradient with respect to filters for `AbstractConv3d`.
-    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
-    for a more detailed documentation.
-    :note: You will not want to use this directly, but rely on
+    Refer to :class:`BaseAbstractConv` for more detailed documentation.
-           Aesara's automatic differentiation or graph optimization to
-           use it as needed.
    """
@@ -3121,13 +3109,9 @@ class AbstractConv3d_gradWeights(AbstractConv_gradWeights):
 class AbstractConv_gradInputs(BaseAbstractConv):
-    """Gradient wrt. inputs for `AbstractConv`.
+    """Gradient with respect to inputs for `AbstractConv`.
-    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
-    for a more detailed documentation.
-    :note: You will not want to use this directly, but rely on
+    Refer to :class:`BaseAbstractConv` for more detailed documentation.
-           Aesara's automatic differentiation or graph optimization to
-           use it as needed.
    """
@@ -3373,13 +3357,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
 class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
-    """Gradient wrt. inputs for `AbstractConv2d`.
+    """Gradient with respect to inputs for `AbstractConv2d`.
-    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
-    for a more detailed documentation.
-    :note: You will not want to use this directly, but rely on
+    Refer to :class:`BaseAbstractConv` for more detailed documentation.
-           Aesara's automatic differentiation or graph optimization to
-           use it as needed.
    """
@@ -3440,13 +3420,9 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
 class AbstractConv3d_gradInputs(AbstractConv_gradInputs):
-    """Gradient wrt. inputs for `AbstractConv3d`.
+    """Gradient with respect to inputs for `AbstractConv3d`.
-    Refer to :func:`BaseAbstractConv <aesara.tensor.nnet.abstract_conv.BaseAbstractConv>`
-    for a more detailed documentation.
-    :note: You will not want to use this directly, but rely on
+    Refer to :class:`BaseAbstractConv` for more detailed documentation.
-           Aesara's automatic differentiation or graph optimization to
-           use it as needed.
    """

--- a/aesara/tensor/subtensor_opt.py
+++ b/aesara/tensor/subtensor_opt.py
@@ -1388,21 +1388,20 @@ def local_setsubtensor_of_constants(fgraph, node):
 @register_specialize
 @node_rewriter([AdvancedSubtensor1])
 def local_adv_sub1_adv_inc_sub1(fgraph, node):
-    """Optimize the possible AdvSub1(AdvSetSub1(...), ...).
+    """Rewrite graphs like ``AdvancedSubtensor1(AdvancedSetSubtensor1(...), ...)``.
-    AdvancedSubtensor1(AdvancedSetSubtensor1(x, y, idx), idx) -> y
+        AdvancedSubtensor1(AdvancedSetSubtensor1(x, y, idx), idx) -> y
    Notes
    -----
-    This opt add AssertOp. Otherwise, it would remove shape and
+    This rewrite adds an `AssertOp`; otherwise, it would remove shape and index
-    index error. If you want to get rid of them, see the
+    error. If you want to get rid of them, see the :ref:`unsafe_rewrites`
-    :ref:`unsafe_optimization` section.
+    section.
-    WARNING:
+    A previous version of this rewrite also matched
-    A previous version of this optimization also matched
+    ``AdvancedSubtensor1(AdvancedIncSubtensor1(x, y, idx), idx)``.
-    AdvancedSubtensor1(AdvancedIncSubtensor1(0s, y, idx), idx) -> y
    This is incorrect when there are duplicate indices.
-    The current version warns the user about potential past issues.
+    The current version warns the user about potential issues.
    """
    if not isinstance(node.op, AdvancedSubtensor1):

--- a/doc/extending/creating_an_op.rst
+++ b/doc/extending/creating_an_op.rst
--- a/doc/extending/extending_faq.rst
+++ b/doc/extending/extending_faq.rst
@@ -5,26 +5,26 @@
 Extending Aesara: FAQ and Troubleshooting
 =========================================
-I wrote a new Op/Type, and weird stuff is happening...
+I wrote a new `Op`\/`Type`, and weird stuff is happening...
------------------------------------------------------
+-----------------------------------------------------------
 First, check the :ref:`op_contract` and the :ref:`type_contract`
 and make sure you're following the rules.
-Then try running your program in :ref:`using_debugmode`.  DebugMode might catch
+Then try running your program in :ref:`using_debugmode`.  `DebugMode` might catch
 something that you're not seeing.
-I wrote a new optimization, but it's not getting used...
+I wrote a new rewrite, but it's not getting used...
---------------------------------------------------------
+---------------------------------------------------
-Remember that you have to register optimizations with the :ref:`optdb`
+Remember that you have to register rewrites with the :ref:`optdb`
 for them to get used by the normal modes like FAST_COMPILE, FAST_RUN,
-and DebugMode.
+and `DebugMode`.
-I wrote a new optimization, and it changed my results even though I'm pretty sure it is correct.
+I wrote a new rewrite, and it changed my results even though I'm pretty sure it is correct.
------------------------------------------------------------------------------------------------
+-------------------------------------------------------------------------------------------
 First, check the :ref:`op_contract` and make sure you're following the rules.
-Then try running your program in :ref:`using_debugmode`.  DebugMode might
+Then try running your program in :ref:`using_debugmode`.  `DebugMode` might
 catch something that you're not seeing.
--- a/doc/extending/graph_rewriting.rst
+++ b/doc/extending/graph_rewriting.rst
--- a/doc/extending/graphstructures.rst
+++ b/doc/extending/graphstructures.rst
@@ -205,7 +205,7 @@ structures, code going like ``def f(x): ...`` would produce an :class:`Op` for
 A :class:`Type` in Aesara provides static information (or constraints) about
 data objects in a graph. The information provided by :class:`Type`\s allows
-Aesara to perform optimizations and produce more efficient compiled code.
+Aesara to perform rewrites and produce more efficient compiled code.
 Every symbolic :class:`Variable` in an Aesara graph has an associated
 :class:`Type` instance, and :class:`Type`\s also serve as a means of
@@ -306,7 +306,7 @@ When used in a computation graph as the input of an
 will *always* take the value contained in the :class:`Constant`'s data
 field. Furthermore, it is assumed that the :class:`Op` will not under
 any circumstances modify the input. This means that a :class:`Constant` is
-eligible to participate in numerous optimizations: constant in-lining
+eligible to participate in numerous rewrites: constant in-lining
 in C code, constant folding, etc.
 Automatic Differentiation
@@ -327,26 +327,26 @@ gradient of the graph's output with respect to the graph's inputs.
 A following section of this tutorial will examine the topic of
 :ref:`differentiation<tutcomputinggrads>` in greater detail.
-Optimizations
+Rewrites
-=============
+========
 When compiling an Aesara graph using :func:`aesara.function`, a graph is
 necessarily provided.  While this graph structure shows how to compute the
 output from the input, it also offers the possibility to improve the way this
-computation is carried out. The way optimizations work in Aesara is by
+computation is carried out. The way rewrites work in Aesara is by
 identifying and replacing certain patterns in the graph with other specialized
 patterns that produce the same results but are either faster or more
-stable. Optimizations can also detect identical subgraphs and ensure that the
+stable. Rewrites can also detect identical subgraphs and ensure that the
 same values are not computed twice.
-For example, one (simple) optimization that Aesara uses is to replace
+For example, one simple rewrite that Aesara uses is to replace
 the pattern :math:`\frac{xy}{y}` by :math:`x`.
 See :ref:`graph_rewriting` and :ref:`optimizations` for more information.
 **Example**
-Consider the following example of optimization:
+Consider the following example of rewrites:
 >>> import aesara
 >>> a = aesara.tensor.vector("a")      # declare symbolic variable
@@ -354,13 +354,13 @@ Consider the following example of optimization:
 >>> f = aesara.function([a], b)        # compile function
 >>> print(f([0, 1, 2]))                # prints `array([0,2,1026])`
 [    0.     2.  1026.]
->>> aesara.printing.pydotprint(b, outfile="./pics/symbolic_graph_unopt.png", var_with_name_simple=True)  # doctest: +SKIP
+>>> aesara.printing.pydotprint(b, outfile="./pics/symbolic_graph_no_rewrite.png", var_with_name_simple=True)  # doctest: +SKIP
-The output file is available at ./pics/symbolic_graph_unopt.png
+The output file is available at ./pics/symbolic_graph_no_rewrite.png
->>> aesara.printing.pydotprint(f, outfile="./pics/symbolic_graph_opt.png", var_with_name_simple=True)  # doctest: +SKIP
+>>> aesara.printing.pydotprint(f, outfile="./pics/symbolic_graph_rewite.png", var_with_name_simple=True)  # doctest: +SKIP
-The output file is available at ./pics/symbolic_graph_opt.png
+The output file is available at ./pics/symbolic_graph_rewrite.png
-We used :func:`aesara.printing.pydotprint` to visualize the optimized graph
+We used :func:`aesara.printing.pydotprint` to visualize the rewritten graph
-(right), which is much more compact than the unoptimized graph (left).
+(right), which is much more compact than the un-rewritten graph (left).
 .. |g1| image:: ./pics/symbolic_graph_unopt.png
        :width: 500 px
@@ -368,7 +368,7 @@ We used :func:`aesara.printing.pydotprint` to visualize the optimized graph
        :width: 500 px
 ================================ ====================== ================================
-        Unoptimized graph                                         Optimized graph
+        Un-rewritten graph                                      Rewritten graph
 ================================ ====================== ================================
 |g1|                                                              |g2|
 ================================ ====================== ================================
--- a/doc/extending/index.rst
+++ b/doc/extending/index.rst
@@ -6,14 +6,14 @@ Extending Aesara
 ================
 This advanced tutorial is for users who want to extend Aesara with new :class:`Type`\s,
-new Operations (:Class:`Op`\S), and new graph optimizations. This first page of the
+new operations (i.e. :class:`Op`\s), and new graph rewrites. This first page of the
-tutorial mainly focuses on the Python implementation of an :Class:`Op` and then
+tutorial mainly focuses on the Python implementation of an :class:`Op` and then
 proposes an overview of the most important methods that define an :class:`Op`.
 The second page of the tutorial (:ref:`creating_a_c_op`) provides then
-information on the C implementation of an :Class:`Op`. The rest of the tutorial
+information on the C implementation of an :class:`Op`. The rest of the tutorial
-goes more in depth on advanced topics related to :Class:`Op`\s, such as how to write
+goes more in depth on advanced topics related to :class:`Op`\s, such as how to write
-efficient code for an :Class:`Op` and how to write an optimization to speed up the
+efficient code for an :class:`Op` and how to write an rewrite to speed up the
-execution of an :Class:`Op`.
+execution of an :class:`Op`.
 Along the way, this tutorial also introduces many aspects of how Aesara works,
 so it is also good for you if you are interested in getting more under the hood
@@ -23,11 +23,11 @@ with Aesara itself.
    Before tackling this more advanced presentation, it is highly recommended
    to read the introductory :ref:`Tutorial<tutorial>`, especially the sections
-    that introduce the Aesara Graphs, as providing a novel Aesara :class:`Op` requires a
+    that introduce the Aesara graphs, as providing a novel Aesara :class:`Op` requires a
-    basic understanting of the Aesara Graphs.
+    basic understanting of the Aesara graphs.
    See also the :ref:`dev_start_guide` for information regarding the
-    versioning framework, namely about *git* and *GitHub*, regarding the
+    versioning framework, namely about Git and GitHub, regarding the
    development workflow and how to make a quality contribution.
 .. toctree::

--- a/doc/extending/inplace.rst
+++ b/doc/extending/inplace.rst
@@ -5,11 +5,11 @@
 Views and inplace operations
 ============================
-Aesara allows the definition of ``Op``\s which return a :term:`view` on one
+Aesara allows the definition of :class:`Op`\s which return a :term:`view` on one
 of their inputs or operate :term:`inplace` on one or several
-inputs. This allows more efficient operations on NumPy's ``ndarray``
+inputs. This allows more efficient operations on NumPy's :class:`ndarray`
 data type than would be possible otherwise.
-However, in order to work correctly, these ``Op``\s need to
+However, in order to work correctly, these :class:`Op`\s need to
 implement an additional interface.
 Aesara recognizes views and inplace operations specially. It ensures
@@ -23,7 +23,7 @@ Views
 A "view" on an object ``x`` is an object ``y`` which shares memory
 with ``x`` in some way. In other words, changing ``x`` might also
-change ``y`` and vice versa. For example, imagine a ``vector`` structure
+change ``y`` and vice versa. For example, imagine a `vector` structure
 which contains two fields: an integer length and a pointer to a memory
 buffer. Suppose we have:
@@ -44,9 +44,9 @@ range ``0xDEADBEFF - 0xDEADBFDF`` and z the range ``0xCAFEBABE -
 0xCAFEBBBE``. Since the ranges for ``x`` and ``y`` overlap, ``y`` is
 considered to be a view of ``x`` and vice versa.
-Suppose you had an ``Op`` which took ``x`` as input and returned
+Suppose you had an :class:`Op` which took ``x`` as input and returned
 ``y``. You would need to tell Aesara that ``y`` is a view of ``x``. For this
-purpose, you would set the ``view_map`` field as follows:
+purpose, you would set the :class:`Op.view_map` field as follows:
 .. testsetup::
@@ -88,15 +88,15 @@ Inplace operations
 An inplace operation is one that modifies one or more of its
 inputs. For example, the expression ``x += y`` where ``x`` and ``y``
-are ``numpy.ndarray`` instances would normally represent an inplace
+are :class:`numpy.ndarray` instances would normally represent an inplace
 operation on ``x``.
 .. note::
   Inplace operations in Aesara still work in a functional setting:
   they need to return the modified input. Symbolically, Aesara
-   requires one Variable standing for the input *before* being modified
+   requires one :class:`Variable` standing for the input before being modified
-   and *another* Variable representing the input *after* being
+   and another :class:`Variable` representing the input after being
   modified. Therefore, code using inplace operations would look like
   this:
@@ -121,29 +121,29 @@ operation on ``x``.
   Needless to say, this goes for user-defined inplace operations as
   well; the modified input must figure in the list of outputs you
-   give to ``Apply`` in the definition of ``make_node``.
+   give to :class:`Apply` in the definition of :meth:`Apply.make_node`.
   Also, for technical reasons but also because they are slightly
   confusing to use as evidenced by the previous code, Aesara does not
   allow the end user to use inplace operations by default. However,
-   it does allow *optimizations* to substitute them in in a later
+   it does allow rewrites to substitute them in in a later
   phase. Therefore, typically, if you define an inplace operation,
-   you will define a pure equivalent and an optimization which
+   you will define a pure equivalent and a rewrite which
   substitutes one for the other. Aesara will automatically verify if
   it is possible to do so and will refuse the substitution if it
   introduces inconsistencies.
-Take the previous definitions of ``x``, ``y`` and ``z`` and suppose an ``Op`` which
+Take the previous definitions of ``x``, ``y`` and ``z`` and suppose an :class:`Op` which
 adds one to every byte of its input. If we give ``x`` as an input to
-that ``Op``, it can either allocate a new buffer of the same size as ``x``
+that :class:`Op`, it can either allocate a new buffer of the same size as ``x``
 (that could be ``z``) and set that new buffer's bytes to the variable of
-the addition. That would be a normal, :term:`pure` ``Op``. Alternatively,
+the addition. That would be a normal, :term:`pure`\ :class:`Op`. Alternatively,
-it could add one to each byte *in* the buffer ``x``, therefore
+it could add one to each byte in the buffer ``x``, therefore
-changing it. That would be an inplace ``Op``.
+changing it. That would be an inplace :class:`Op`.
 Aesara needs to be notified of this fact. The syntax is similar to
-that of ``view_map``:
+that of :attr:`Op.view_map`:
 .. testcode::
@@ -171,27 +171,27 @@ first input (position 0).
   # unlike for views, the previous line is legal and supported
 .. note::
-   ``DestroyHandler`` provides a hackish means of specifying that a variable cannot be
+   :class:`DestroyHandler` provides a hackish means of specifying that a variable cannot be
   "destroyed" by an in-place operation: ``var.tag.indestructible = True``.
 Destructive Operations
 ======================
 While some operations will operate inplace on their inputs, some might
-simply destroy or corrupt them. For example, an ``Op`` could do temporary
+simply destroy or corrupt them. For example, an :class:`Op` could do temporary
 calculations right in its inputs. If that is the case, Aesara also
 needs to be notified. The way to notify Aesara is to assume that some
 output operated inplace on whatever inputs are changed or corrupted by
-the ``Op`` (even if the output does not technically reuse any of the
+the :class:`Op` (even if the output does not technically reuse any of the
 input(s)'s memory). From there, go to the previous section.
 .. warning::
   Failure to correctly mark down views and inplace operations using
-   ``view_map`` and ``destroy_map`` can lead to nasty bugs. In the
+   :attr:`Op.view_map` and :attr:`Op.destroy_map` can lead to nasty bugs. In the
   absence of this information, Aesara might assume that it is safe to
-   execute an inplace operation on some inputs *before* doing other
+   execute an inplace operation on some inputs before doing other
-   calculations on the *previous* values of the inputs. For example,
+   calculations on the previous values of the inputs. For example,
   in the code: ``y = log(x); x2 = add_inplace(x, z)`` it is
   imperative to do the logarithm before the addition (because after
   the addition, the original x that we wanted to take the logarithm
@@ -199,25 +199,28 @@ input(s)'s memory). From there, go to the previous section.
   the value of ``x`` it might invert the order and that will
   certainly lead to erroneous computations.
-   You can often identify an incorrect ``view_map`` or ``destroy_map``
+   You can often identify an incorrect `Op.view_map` or :attr:`Op.destroy_map`
-   by using :ref:`DebugMode`.  *Be sure to use ``DebugMode`` when developing
+   by using :ref:`DebugMode`.
-   a new ``Op`` that uses ``view_map`` and/or ``destroy_map``.*
-Inplace optimization and DebugMode
+.. note::
-==================================
+   Consider using :class:`DebugMode` when developing
+   a new :class:`Op` that uses :attr:`Op.view_map` and/or :attr:`Op.destroy_map`.
+Inplace Rewriting and `DebugMode`
+=================================
-It is recommended that during the graph construction, all ``Op``\s are not inplace.
+It is recommended that during the graph construction, all :class:`Op`\s are not inplace.
-Then an optimization replaces them with inplace ones. Currently ``DebugMode`` checks
+Then a rewrite replaces them with inplace ones. Currently :class:`DebugMode` checks
-all optimizations that were tried even if they got rejected. One reason an inplace
+all rewrites that were tried even if they got rejected. One reason an inplace
-optimization can get rejected is when there is another ``Op`` that is already being applied
+rewrite can get rejected is when there is another :class:`Op` that is already being applied
-inplace on the same input. Another reason to reject an inplace optimization is
+inplace on the same input. Another reason to reject an inplace rewrite is
 if it would introduce a cycle into the graph.
-The problem with ``DebugMode`` is that it will trigger a useless error when
+The problem with `DebugMode` is that it will trigger a useless error when
-checking a rejected inplace optimization, since it will lead to wrong results.
+checking a rejected inplace rewrite, since it will lead to wrong results.
-In order to be able to use ``DebugMode`` in more situations, your inplace
+In order to be able to use `DebugMode` in more situations, your inplace
-optimization can pre-check whether it will get rejected by using the
+rewrite can pre-check whether it will get rejected by using the
-``aesara.graph.destroyhandler.fast_inplace_check()`` function, that will tell
+:func:`aesara.graph.destroyhandler.fast_inplace_check` function, that will tell
-which ``Op``\s can be performed inplace. You may then skip the optimization if it is
+which :class:`Op`\s can be performed inplace. You may then skip the rewrite if it is
-incompatible with this check. Note however that this check does not cover all
+incompatible with this check. Note, however, that this check does not cover all
-cases where an optimization may be rejected (it will not detect cycles).
+cases where a rewrite may be rejected (it will not detect cycles).
--- a/doc/extending/op.rst
+++ b/doc/extending/op.rst
@@ -77,12 +77,12 @@ It has to define the following methods.
  ``other`` is also an :class:`Op`.
-  Returning ``True`` here is a promise to the optimization system
+  Returning ``True`` here is a promise to the rewrite system
  that the other :class:`Op` will produce exactly the same graph effects
-  (from perform) as this one, given identical inputs. This means it
+  (e.g. from its :meth:`Op.perform`) as this one, given identical inputs. This means it
  will produce the same output values, it will destroy the same
-  inputs (same ``destroy_map``), and will alias outputs to the same
+  inputs (same :attr:`Op.destroy_map`), and will alias outputs to the same
-  inputs (same ``view_map``). For more details, see
+  inputs (same :attr:`Op.view_map`). For more details, see
  :ref:`views_and_inplace`.
  .. note::
@@ -99,9 +99,9 @@ It has to define the following methods.
  lifetime of self.  :class:`Op` instances should be immutable in this
  sense.
-  .. note::
+.. note::
-      If you set `__props__`, this will be automatically generated.
+    If you set :attr:`Op.__props__`, this will be automatically generated.
 .. op_optional:
@@ -110,7 +110,7 @@ Optional methods or attributes
 .. attribute:: __props__
-  *Default:* Undefined
+  Default: Undefined
  Must be a tuple.  Lists the name of the attributes which influence
  the computation performed.  This will also enable the automatic
@@ -122,7 +122,7 @@ Optional methods or attributes
 .. attribute:: default_output
-  *Default:* None
+  Default: None
  If this member variable is an integer, then the default
  implementation of ``__call__`` will return
@@ -177,7 +177,7 @@ Optional methods or attributes
 .. function:: infer_shape(fgraph, node, shapes)
-   This function is needed for shape optimization. ``shapes`` is a
+   This function is needed for shape rewrites. ``shapes`` is a
   list with one tuple for each input of the :class:`Apply` node (which corresponds
   to the inputs of the :class:`Op`).  Each tuple contains as many elements as the
   number of dimensions of the corresponding input. The value of each element
@@ -216,9 +216,9 @@ Optional methods or attributes
 .. function:: do_constant_folding(fgraph, node)
-   *Default:* Return True
+   Default: Return ``True``
-   By default when optimizations are enabled, we remove during
+   By default when rewrites are enabled, we remove during
   function compilation :class:`Apply` nodes whose inputs are all constants.
   We replace the :class:`Apply` node with an Aesara constant variable.
   This way, the :class:`Apply` node is not executed at each function

--- a/doc/extending/pipeline.rst
+++ b/doc/extending/pipeline.rst
@@ -35,21 +35,20 @@ Some relevant :ref:`Features <libdoc_graph_fgraphfeature>` are typically added t
 rewrites from operating in-place on inputs declared as immutable.
-Step 2 - Perform graph optimizations
+Step 2 - Perform graph rewrites
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Once the :class:`FunctionGraph` is constructed, an :term:`optimizer` is produced by
+Once the :class:`FunctionGraph` is constructed, a :term:`rewriter` is produced by
-the :term:`mode` passed to :func:`function` (the :class:`Mode` basically has two
+the :term:`mode` passed to :func:`function`. That rewrite is
-important fields, :attr:`linker` and :attr:`optimizer`). That optimizer is
+applied to the :class:`FunctionGraph` using its :meth:`GraphRewriter.rewrite` method.
-applied on the :class:`FunctionGraph` using its :meth:`Optimizer.optimize` method.
-The optimizer is typically obtained through :attr:`optdb`.
+The rewriter is typically obtained through a query on :attr:`optdb`.
 Step 3 - Execute linker to obtain a thunk
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Once the computation graph is optimized, the :term:`linker` is
+Once the computation graph is rewritten, the :term:`linker` is
 extracted from the :class:`Mode`. It is then called with the :class:`FunctionGraph` as
 argument to produce a ``thunk``, which is a function with no arguments that
 returns nothing. Along with the thunk, one list of input containers (a
@@ -61,9 +60,9 @@ the inputs must be placed in the input containers, the thunk must be
 called, and the outputs must be retrieved from the output containers
 where the thunk put them.
-Typically, the linker calls the ``toposort`` method in order to obtain
+Typically, the linker calls the :meth:`FunctionGraph.toposort` method in order to obtain
 a linear sequence of operations to perform. How they are linked
-together depends on the Linker used. The :class:`CLinker` produces a single
+together depends on the :class:`Linker` class used. For example, the :class:`CLinker` produces a single
 block of C code for the whole computation, whereas the :class:`OpWiseCLinker`
 produces one thunk for each individual operation and calls them in
 sequence.

--- a/doc/extending/scan.rst
+++ b/doc/extending/scan.rst
@@ -36,7 +36,7 @@ The following sections assumes the reader is familiar with the following :
 2. The interface and usage of Aesara's :ref:`scan <lib_scan>` function
-Additionally, the :ref:`scan_internals_optimizations` section below assumes
+Additionally, the :ref:`scan_internals_rewrites` section below assumes
 knowledge of:
 3. Aesara's :ref:`graph rewriting <graph_rewriting>`
@@ -63,7 +63,7 @@ deal with, are :
 * ``views.py`` contains different views of the `Scan` `Op` that have
  simpler and easier signatures to be used in specific cases.
-* ``opt.py`` contains the list of all Aesara graph optimizations for the
+* ``opt.py`` contains the list of all Aesara graph rewrites for the
  `Scan` operator.
@@ -155,15 +155,15 @@ Multiply-recurrent multiple outputs (MITMOT)                 Initial values for
 ===========================================================  =======================================================  ============================================================  =============================================================  =========================================================  ======================================================
-.. _scan_internals_optimizations:
+.. _scan_internals_rewrites:
-Optimizations
+Rewrites
-=============
+========
 `remove_constants_and_unused_inputs_scan`
 -----------------------------------------
-This optimization serves two purposes, The first is to remove a `Scan` `Op`'s
+This rewrite serves two purposes, The first is to remove a :class:`Scan`\ `Op`'s
 unused inputs. The second is to take a `Scan` `Op`'s constant inputs and remove
 them, instead injecting the constants directly into the graph or the `Scan`
 `Op`'s inner function. This will allow constant folding to happen inside the
@@ -173,31 +173,31 @@ inner function.
 `PushOutNonSeqScan`
 -------------------
-This optimizations pushes, out of `Scan`'s inner function and into the outer
+This rewrite pushes sub-graphs that depends only on non-sequence inputs out of
-function, computation that depends only on non-sequence inputs. Such
+`Scan`'s inner function and into the outer function. Such computation ends up
-computation ends up being done every iteration on the same values so moving
+being done every iteration on the same values so moving it to the outer function
-it to the outer function to be executed only once, before the `Scan` `Op`,
+to be executed only once, before the `Scan`\ `Op`, reduces the amount of
-reduces the amount of computation that needs to be performed.
+computation that needs to be performed.
 `PushOutSeqScan`
 ----------------
-This optimization resembles `PushOutNonSeqScan` but it tries to push, out of
+This rewrite resembles `PushOutNonSeqScan` but it tries to push, out of
 the inner function, the computation that only relies on sequence and
-non-sequence inputs. The idea behind this optimization is that, when it is
+non-sequence inputs. The idea behind this rewrite is that, when it is
 possible to do so, it is generally more computationally efficient to perform
 a single operation on a large tensor rather then perform that same operation
-many times on many smaller tensors. In many cases, this optimization can
+many times on many smaller tensors. In many cases, this rewrite can
 increase memory usage but, in some specific cases, it can also decrease it.
 `PushOutScanOutput`
 -------------------
-This optimizations attempts to push out some of the computation at the end
+This rewrite attempts to push out some of the computation at the end
 of the inner function to the outer function, to be executed after the `Scan`
-node. Like `PushOutSeqScan`, this optimization aims to replace many operations
+node. Like `PushOutSeqScan`, this rewrite aims to replace many operations
 on small tensors by few operations on large tensors. It can also lead to
 increased memory usage.
@@ -205,23 +205,23 @@ increased memory usage.
 `PushOutDot1`
 -------------
-This is another optimization that attempts to detect certain patterns of
+This is another rewrite that attempts to detect certain patterns of
-computation in a `Scan` `Op`'s inner function and move this computation to the
+computation in a `Scan`\ `Op`'s inner function and move this computation to the
 outer graph.
 `ScanInplaceOptimizer`
 ----------------------
-This optimization attempts to make `Scan` compute its recurrent outputs inplace
+This rewrite attempts to make `Scan` compute its recurrent outputs inplace
-on the input tensors that contain their initial states. This optimization can
+on the input tensors that contain their initial states. This rewrite can
 improve runtime performance as well as reduce memory usage.
 `ScanSaveMem`
 -------------
-This optimizations attempts to determine if a `Scan` node, during its execution,
+This rewrite attempts to determine if a `Scan` node, during its execution,
 for any of its outputs, can get away with allocating a memory buffer that is
 large enough to contain some of the computed timesteps of that output but not
 all of them.
@@ -233,7 +233,7 @@ need to store the most recent ``N`` values, not all of them.
 For instance, if a `Scan` node has a SITSOT output (last computed value is
 fed back as an input at the next iteration) and only the last timestep of
-that output is ever used in the outer function, the `ScanSaveMem` optimization
+that output is ever used in the outer function, the `ScanSaveMem` rewrite
 could determine that there is no need to store all computed timesteps for
 that SITSOT output. Only the most recently computed timestep ever needs to
 be kept in memory.
@@ -242,11 +242,11 @@ be kept in memory.
 `ScanMerge`
 -----------
-This optimization attempts to fuse distinct `Scan` `Op`s into a single `Scan` `Op`
+This rewrite attempts to fuse distinct `Scan` nodes into a single `Scan` node
-that performs all the computation. The main advantage of merging `Scan` `Op`\s
+that performs all the computation. The main advantage of merging `Scan` nodes
-together comes from the possibility of both original `Op`\s having some
+together comes from the possibility of both original `Scan`\ `Op`\s having some
 computation in common. In such a setting, this computation ends up being done
-twice. The fused `Scan` `Op`, however, would only need to do it once and could
+twice. The fused `Scan`\s, however, would only need to do it once and could
 therefore be more computationally efficient. Also, since every `Scan` node
 involves a certain overhead, at runtime, reducing the number of `Scan` nodes in
 the graph can improve performance.
@@ -255,7 +255,7 @@ the graph can improve performance.
 `scan_merge_inouts`
 -------------------
-This optimization attempts to merge a `Scan` `Op`'s identical outer inputs as well
+This rewrite attempts to merge a `Scan`\s identical outer inputs as well
 as merge its identical outer outputs (outputs that perform the same
 computation on the same inputs). This can reduce the amount of computation as
 well as result in a simpler graph for both the inner function and the outer
@@ -267,7 +267,7 @@ Helper classes and functions
 Because of the complexity involved in dealing with `Scan`, a large number of
 helper classes and functions have been developed over time to implement
-operations commonly needed when dealing with the `Scan` `Op`. The `Scan` `Op`
+operations commonly needed when dealing with the `Scan`\ `Op`. The `Scan`\ `Op`
 itself defines a large number of them and others can be found in the file
 ``utils.py``. This sections aims to point out the most useful ones sorted
 by usage.

--- a/doc/extending/tips.rst
+++ b/doc/extending/tips.rst
@@ -25,7 +25,7 @@ simple function:
   def sum_square_difference(a, b):
       return at.sum((a - b)**2)
-Even without taking Aesara's optimizations into account, it is likely
+Even without taking Aesara's rewrites into account, it is likely
 to work just as well as a custom implementation. It also supports all
 data types, tensors of all dimensions as well as broadcasting, whereas
 a custom implementation would probably only bother to support

--- a/doc/extending/type.rst
+++ b/doc/extending/type.rst
@@ -5,7 +5,7 @@
 ===============
 The :class:`Type` class is used to provide "static" information about the types of
-:class:`Variable`\s in an Aesara graph.  This information is used for graph optimizations
+:class:`Variable`\s in an Aesara graph.  This information is used for graph rewrites
 and compilation to languages with typing that's stricter than Python's.
 The types handled by Aesara naturally overlap a lot with NumPy, but
@@ -311,7 +311,7 @@ default values.
        Optional. Only needed to profile the memory of this :class:`Type` of object.
-        :param shape_info: the output of the call to get_shape_info()
+        :param shape_info: the output of the call to `get_shape_info`
        :return: the number of bytes taken by the object described by
            ``shape_info``.
@@ -324,8 +324,8 @@ For certain mechanisms, you can register functions and other such
 things to plus your type into aesara's mechanisms.  These are optional
 but will allow people to use you type with familiar interfaces.
-`transfer()`
+`transfer`
-~~~~~~~~~~~~
+~~~~~~~~~~
 To plug in additional options for the transfer target, define a
 function which takes an Aesara variable and a target argument and
@@ -388,7 +388,7 @@ when ``allow_downcast`` is False, i.e. no precision loss is allowed.
 The second method we define is ``values_eq_approx``. This method
 allows approximate comparison between two values respecting our :class:`Type`'s
-constraints. It might happen that an optimization changes the computation
+constraints. It might happen that a rewrite changes the computation
 graph in such a way that it produces slightly different variables, for
 example because of numerical instability like rounding errors at the
 end of the mantissa. For instance, ``a + a + a + a + a + a`` might not

--- a/doc/extending/unittest.rst
+++ b/doc/extending/unittest.rst
@@ -13,7 +13,7 @@ stressed enough!
 Unit Testing revolves around the following principles:
 * ensuring correctness: making sure that your :class:`Op`, :class:`Type` or
-  optimization works in the way you intended it to work. It is important for
+  rewrites works in the way you intended it to work. It is important for
  this testing to be as thorough as possible: test not only the obvious cases,
  but more importantly the corner cases which are more likely to trigger bugs
  down the line.

--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -46,28 +46,28 @@ Faster Aesara Function Compilation
 Aesara function compilation can be time consuming. It can be sped up by setting
 the flag ``mode=FAST_COMPILE`` which instructs Aesara to skip most
-optimizations and disables the generation of any c/cuda code. This is useful
+rewrites and disables the generation of any c/cuda code. This is useful
 for quickly testing a simple idea.
 If C code is necessary, the flag
 ``optimizer=fast_compile`` can be used instead. It instructs Aesara to
-skip time consuming optimizations but still generate C code.
+skip time consuming rewrites but still generate C code.
 Similarly using the flag ``optimizer_excluding=inplace`` will speed up
-compilation by preventing optimizations that replace operations with a
+compilation by preventing rewrites that replace operations with a
 version that reuses memory where it will not negatively impact the
-integrity of the operation. Such optimizations can be time
+integrity of the operation. Such rewrites can be time
 consuming. However using this flag will result in greater memory usage
 because space must be allocated for the results which would be
 unnecessary otherwise. In short, using this flag will speed up
 compilation but it will also use more memory because
-``optimizer_excluding=inplace`` excludes inplace optimizations
+``optimizer_excluding=inplace`` excludes inplace rewrites
 resulting in a trade off between speed of compilation and memory
 usage.
 Alternatively, if the graph is big, using the flag ``cycle_detection=fast``
 will speedup the computations by removing some of the inplace
-optimizations. This would allow aesara to skip a time consuming cycle
+rewrites. This would allow aesara to skip a time consuming cycle
 detection algorithm. If the graph is big enough,we suggest that you use
 this flag instead of ``optimizer_excluding=inplace``. It will result in a
 computation time that is in between fast compile and fast run.
@@ -82,23 +82,23 @@ garbage collection will keep all intermediate results' memory space to allow to
 reuse them during the next call to the same Aesara function, if they are of the
 correct shape. The shape could change if the shapes of the inputs change.
-.. _unsafe_optimization:
+.. _unsafe_rewrites:
-Unsafe optimization
+Unsafe Rewrites
-===================
+===============
-Some Aesara optimizations make the assumption that the user inputs are
+Some Aesara rewrites make the assumption that the user inputs are
 valid. What this means is that if the user provides invalid values (like
 incompatible shapes or indexing values that are out of bounds) and
-the optimizations are applied, the user error will get lost. Most of the
+the rewrites are applied, the user error will get lost. Most of the
 time, the assumption is that the user inputs are valid. So it is good
-to have the optimization being applied, but losing the error is bad.
+to have the rewrite applied, but losing the error is bad.
-The newest optimization in Aesara with such assumption will add an
+The newest rewrite in Aesara with such an assumption will add an
 assertion in the graph to keep the user error message. Computing
 these assertions could take some time. If you are sure everything is valid
-in your graph and want the fastest possible Aesara, you can enable an
+in your graph and want the fastest possible Aesara, you can enable a
-optimization that will remove those assertions with:
+rewrite that will remove the assertions with:
 ``optimizer_including=local_remove_all_assert``

--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -68,13 +68,13 @@ Glossary
        :term:`Type`, or read more about :ref:`graphstructures`.
    Destructive
-        An :term:`Op` is destructive (of particular input[s]) if its
+        An :term:`Op` is destructive--of particular input(s)--if its
        computation requires that one or more inputs be overwritten or
        otherwise invalidated.  For example, :term:`inplace`\ :class:`Op`\s are
        destructive.  Destructive :class:`Op`\s can sometimes be faster than
        non-destructive alternatives.  Aesara encourages users not to put
        destructive :class:`Op`\s into graphs that are given to :term:`aesara.function`,
-        but instead to trust the optimizations to insert destructive ops
+        but instead to trust the rewrites to insert destructive :class:`Op`\s
        judiciously.
        Destructive :class:`Op`\s are indicated via a :attr:`Op.destroy_map` attribute. (See
@@ -90,14 +90,16 @@ Glossary
        every element, this is an inplace operation because when you are done,
        the original input has been overwritten.  :class:`Op`\s representing inplace
        computations are :term:`destructive`, and by default these can only be
-        inserted by optimizations, not user code.
+        inserted by rewrites, not user code.
    Linker
-        Part of a function :term:`Mode` -- an object responsible for 'running'
+        A :class:`Linker` instance responsible for "running" the compiled
-        the compiled function.  Among other things, the linker determines whether computations are carried out with C or Python code.
+        function.  Among other things, the linker determines whether
+        computations are carried out with
+        C or Python code.
    Mode
-        An object providing an :term:`optimizer` and a :term:`linker` that is
+        A :class:`Mode` instance specifying an :term:`optimizer` and a :term:`linker` that is
        passed to :term:`aesara.function`.  It parametrizes how an expression
        graph is converted to a callable object.
@@ -120,12 +122,6 @@ Glossary
        An instance of a :term:`rewriter` that has the capacity to provide
        an improvement to the performance of a graph.
-    Optimization
-        A :term:`graph` transformation applied by an :term:`optimizer` during
-        the compilation of a :term:`graph` by :term:`aesara.function`.  These
-        are graph rewrites that are intended to improve the performance of
-        a compiled :term:`Graph`.
    Pure
        An :term:`Op` is *pure* if it has no :term:`destructive` side-effects.

--- a/doc/index.rst
+++ b/doc/index.rst
@@ -2,14 +2,20 @@
 Welcome
 =======
-Aesara is a Python library that allows you to define, optimize, and
+Aesara is a Python library that allows you to define, optimize/rewrite, and
-evaluate mathematical expressions involving multi-dimensional
+evaluate mathematical expressions involving multi-dimensional arrays
-arrays efficiently. Aesara features:
+efficiently.
-* **Tight integration with NumPy** -- Use ``numpy.ndarray`` in Aesara-compiled functions.
+Some of Aesara's features are:
-* **Efficient symbolic differentiation** -- Aesara does your derivatives for functions with one or many inputs.
-* **Speed and stability optimizations** -- Get the right answer for ``log(1+x)`` even when ``x`` is really tiny.
+* **Tight integration with NumPy**
-* **Dynamic C/JAX/Numba code generation** -- Evaluate expressions faster.
+  - Use `numpy.ndarray` in Aesara-compiled functions
+* **Efficient symbolic differentiation**
+  - Aesara efficiently computes your derivatives for functions with one or many inputs
+* **Speed and stability optimizations**
+  - Get the right answer for ``log(1 + x)`` even when ``x`` is near zero
+* **Dynamic C/JAX/Numba code generation**
+  - Evaluate expressions faster
 Aesara is based on `Theano`_, which has been powering large-scale computationally
 intensive scientific investigations since 2007.

--- a/doc/introduction.rst
+++ b/doc/introduction.rst
@@ -5,28 +5,28 @@
 Aesara at a Glance
 ==================
-Aesara is a Python library that lets you define, optimize, and evaluate
+Aesara is a Python library that allows one to define, optimize/rewrite, and
-mathematical expressions, especially ones involving multi-dimensional arrays
+evaluate mathematical expressions, especially ones involving multi-dimensional
-(e.g. :class:`numpy.ndarray`\s).  Using Aesara it is
+arrays (e.g. :class:`numpy.ndarray`\s).  Using Aesara, it is possible to attain
-possible to attain speeds rivaling hand-crafted C implementations for problems
+speeds rivaling hand-crafted C implementations for problems involving large
-involving large amounts of data.
+amounts of data.
 Aesara combines aspects of a computer algebra system (CAS) with aspects of an
-optimizing compiler. It can also generate customized C code for many
+optimizing compiler. It can also generate customized code for multiple compiled
-mathematical operations.  This combination of CAS with optimizing compilation
+languages and/or their Python-based interfaces, such as C, Numba, and JAX.  This
+combination of CAS features with optimizing compilation and transpilation
 is particularly useful for tasks in which complicated mathematical expressions
 are evaluated repeatedly and evaluation speed is critical.  For situations
 where many different expressions are each evaluated once, Aesara can minimize
-the amount of compilation/analysis overhead, but still provide symbolic
+the amount of compilation and analysis overhead, but still provide symbolic
 features such as automatic differentiation.
-Aesara's compiler applies many optimizations of varying complexity to
+Aesara's compiler applies many default optimizations of varying
-these symbolic expressions. These optimizations include, but are not
+complexity. These optimizations include, but are not limited to:
-limited to:
 * constant folding
-* merging of similar subgraphs, to avoid redundant calculation
+* merging of similar sub-graphs, to avoid redundant calculations
-* arithmetic simplification (e.g. ``x*y/x -> y``, ``--x -> x``)
+* arithmetic simplifications (e.g. ``x * y / x -> y``, ``-(-x) -> x``)
 * inserting efficient BLAS_ operations (e.g. ``GEMM``) in a variety of
  contexts
 * using memory aliasing to avoid unnecessary calculations
@@ -37,7 +37,7 @@ limited to:
 For more information see :ref:`optimizations`.
 Theano
-----------------
+------
 The library that Aesara is based on, Theano, was written at the LISA lab to support rapid development of efficient machine learning algorithms but while Theano was commonly referred to as a "deep learning" (DL) library, Aesara is not a DL library.

--- a/doc/library/compile/debugmode.rst
+++ b/doc/library/compile/debugmode.rst
--- a/doc/library/compile/function.rst
+++ b/doc/library/compile/function.rst
@@ -181,7 +181,7 @@ Reference
      and update the implicit function arguments according to the `updates`.
-    Inputs can be given as variables or In instances.
+    Inputs can be given as variables or :class:`In` instances.
    :class:`In` instances also have a variable, but they attach some extra
    information about how call-time arguments corresponding to that variable
    should be used.  Similarly, :class:`Out` instances can attach information
@@ -189,28 +189,28 @@ Reference
    The default is typically 'FAST_RUN' but this can be changed in
    :doc:`aesara.config <../config>`.  The mode
-    argument controls the sort of optimizations that will be applied to the
+    argument controls the sort of rewrites that will be applied to the
-    graph, and the way the optimized graph will be evaluated.
+    graph, and the way the rewritten graph will be evaluated.
    After each function evaluation, the `updates` mechanism can replace the
-    value of any SharedVariable [implicit] inputs with new values computed
+    value of any (implicit) `SharedVariable` inputs with new values computed
    from the expressions in the `updates` list.  An exception will be raised
-    if you give two update expressions for the same SharedVariable input (that
+    if you give two update expressions for the same `SharedVariable` input (that
    doesn't make sense).
-    If a SharedVariable is not given an update expression, but has a
+    If a `SharedVariable` is not given an update expression, but has a
-    ``default_update`` member containing an expression, this expression
+    :attr:`Variable.default_update` member containing an expression, this expression
    will be used as the update expression for this variable.  Passing
    ``no_default_updates=True`` to ``function`` disables this behavior
    entirely, passing ``no_default_updates=[sharedvar1, sharedvar2]``
    disables it for the mentioned variables.
    Regarding givens: Be careful to make sure that these substitutions are
-    independent, because behaviour when Var1 of one pair appears in the graph leading
+    independent, because behaviour when ``Var1`` of one pair appears in the graph leading
-    to Var2 in another expression is undefined (e.g. with ``{a: x, b: a + 1}``).
+    to ``Var2`` in another expression is undefined (e.g. with ``{a: x, b: a + 1}``).
-    Replacements specified with
+    Replacements specified with givens are different from replacements that
-    givens are different from optimizations in that Var2 is not expected to be
+    occur during normal rewriting, in that ``Var2`` is not expected to be
-    equivalent to Var1.
+    equivalent to ``Var1``.
 .. autofunction:: aesara.compile.function.function_dump

--- a/doc/library/compile/mode.rst
+++ b/doc/library/compile/mode.rst
@@ -18,8 +18,8 @@ inputs-to-outputs graph is transformed into a callable object.
 Aesara defines the following modes by name:
- ``'FAST_COMPILE'``: Apply just a few graph optimizations and only use Python implementations.
+- ``'FAST_COMPILE'``: Apply just a few graph rewrites and only use Python implementations.
- ``'FAST_RUN'``: Apply all optimizations, and use C implementations where possible.
+- ``'FAST_RUN'``: Apply all rewrites, and use C implementations where possible.
 - ``'DebugMode'``: A mode for debugging. See :ref:`DebugMode <debugmode>` for details.
 - ``'NanGuardMode``: :ref:`Nan detector <nanguardmode>`
 - ``'DEBUG_MODE'``: Deprecated. Use the string DebugMode.
@@ -30,7 +30,7 @@ overridden by passing the keyword argument to :func:`aesara.function`.
 .. TODO::
-    For a finer level of control over which optimizations are applied, and whether
+    For a finer level of control over which rewrites are applied, and whether
    C or Python implementations are used, read.... what exactly?
@@ -43,9 +43,9 @@ Reference
 .. class:: Mode(object)
-    Compilation is controlled by two attributes: the `optimizer` controls how
+    Compilation is controlled by two attributes: the :attr:`optimizer` controls how
-    an expression graph will be transformed; the `linker` controls how the
+    an expression graph will be transformed; the :attr:`linker` controls how the
-    optimized expression graph will be evaluated.
+    rewritten expression graph will be evaluated.
    .. attribute:: optimizer
@@ -57,15 +57,15 @@ Reference
    .. method:: including(*tags)
-        Return a new Mode instance like this one, but with an
+        Return a new :class:`Mode` instance like this one, but with its
-        optimizer modified by including the given tags.
+        :attr:`optimizer` modified by including the given tags.
    .. method:: excluding(*tags)
-        Return a new Mode instance like this one, but with an
+        Return a new :class:`Mode` instance like this one, but with an
-        optimizer modified by excluding the given tags.
+        :attr:`optimizer` modified by excluding the given tags.
    .. method:: requiring(*tags)
-        Return a new Mode instance like this one, but with an
+        Return a new :class:`Mode` instance like this one, but with an
-        optimizer modified by requiring the given tags.
+        :attr:`optimizer` modified by requiring the given tags.
--- a/doc/library/compile/opfromgraph.rst
+++ b/doc/library/compile/opfromgraph.rst
@@ -2,22 +2,22 @@
 .. _opfromgraph:
-===========
+============
-OpFromGraph
+`OpFromGraph`
-===========
+============
 This page describes :class:`aesara.compile.builders.OpFromGraph
-<aesara.compile.builders.OpFromGraph>`, an Op that allows to
+<aesara.compile.builders.OpFromGraph>`, an `Op` constructor that allows one to
-encapsulate an Aesara graph in an op.
+encapsulate an Aesara graph in a single `Op`.
 This can be used to encapsulate some functionality in one block. It is
 useful to scale Aesara compilation for regular bigger graphs when we
 reuse that encapsulated functionality with different inputs many
-times. Due to this encapsulation, it can make Aesara compilation phase
+times. Due to this encapsulation, it can make Aesara's compilation phase
 faster for graphs with many nodes.
 Using this for small graphs is not recommended as it disables
-optimizations between what is inside the encapsulation and outside of it.
+rewrites between what is inside the encapsulation and outside of it.
 .. note:

--- a/doc/library/config.rst
+++ b/doc/library/config.rst
@@ -170,8 +170,8 @@ import ``aesara`` and print the config variable, as in:
    Default: ``True``
-    This enables, or disables, an optimization in :class:`Scan` that tries to
+    This enables, or disables, a rewrite in :class:`Scan` that tries to
-    pre-allocate memory for its outputs. Enabling the optimization can give a
+    pre-allocate memory for its outputs. Enabling the rewrite can give a
    significant speed up at the cost of slightly increased memory usage.
 .. attribute:: config.scan__allow_gc
@@ -202,10 +202,10 @@ import ``aesara`` and print the config variable, as in:
    Default: ``off``
-    This is a flag for checking the stack trace during graph optimization.
+    This is a flag for checking stack traces during graph rewriting.
    If :attr:`check_stack_trace` is set to ``off``, no check is performed on the
    stack trace. If :attr:`check_stack_trace` is set to ``log`` or ``warn``, a
-    dummy stack trace is inserted that indicates which optimization inserted the
+    dummy stack trace is inserted that indicates which rewrite inserted the
    variable that had an empty stack trace, but, when ``warn`` is set, a warning
    is also printed.
    If :attr:`check_stack_trace` is set to ``raise``, an exception is raised if a
@@ -315,7 +315,7 @@ import ``aesara`` and print the config variable, as in:
    Default: ``False``
-    When ``True``, the VM and CVM linkers profile the optimization phase when
+    When ``True``, the :class:`VM` and :class:`CVM` linkers profile the rewriting phase when
    compiling an Aesara function.  This only works when ``profile=True``.
 .. attribute:: config.profiling__n_apply
@@ -398,7 +398,7 @@ import ``aesara`` and print the config variable, as in:
    Default: ``'fast_run'``
-    When the mode is ``'Mode'``, it sets the default optimizer used.
+    When the mode is ``'Mode'``, it sets the default rewrites used during compilation.
 .. attribute:: on_opt_error
@@ -406,8 +406,8 @@ import ``aesara`` and print the config variable, as in:
    Default: ``'warn'``
-    When a crash occurs while trying to apply an optimization, either warn the
+    When a crash occurs while trying to apply a rewrite, either warn the
-    user and skip the optimization (i.e. ``'warn'``), raise the exception
+    user and skip the rewrite (i.e. ``'warn'``), raise the exception
    (i.e. ``'raise'``), drop into the ``pdb`` debugger (i.e. ``'pdb'``), or
    ignore it (i.e. ``'ignore'``).
    We suggest never using ``'ignore'`` except during testing.
@@ -503,9 +503,9 @@ import ``aesara`` and print the config variable, as in:
    When ``True``, add asserts that highlight shape errors.
-    Without such asserts, the underlying optimization could hide errors in user
+    Without such asserts, the underlying rewrite could hide errors in user
    code.  Aesara adds the asserts only if it cannot infer that the shapes are
-    equivalent.  When it can determine equivalence, this optimization does not
+    equivalent.  When it can determine equivalence, this rewrite does not
    introduce an assert.
    Removing these asserts can speed up execution.
@@ -653,11 +653,11 @@ import ``aesara`` and print the config variable, as in:
    Default: ``""``
-    A list of optimizer tags that shouldn't be included in the default ``Mode``.
+    A list of rewriter tags that shouldn't be included in the default ``Mode``.
    If multiple tags are provided, separate them by ``':'``.
-    For example, to remove the ``Elemwise`` in-place optimizations,
+    For example, to remove the ``Elemwise`` in-place rewrites,
    use the flags: ``optimizer_excluding:inplace_opt``, where
-    ``inplace_opt`` is the name of the optimization group.
+    ``inplace_opt`` is the name of the rewrite group.
    This flag's value cannot be modified during the program execution.
@@ -665,7 +665,7 @@ import ``aesara`` and print the config variable, as in:
    Default: ``""``
-    A list of optimizer tags to be included in the default ``Mode``.
+    A list of rewriter tags to be included in the default ``Mode``.
    If multiple tags are provided, separate them by ``':'``.
    This flag's value cannot be modified during the program execution.
@@ -674,7 +674,7 @@ import ``aesara`` and print the config variable, as in:
    Default: ``""``
-    A list of optimizer tags that are required for optimization in the default
+    A list of rewriter tags that are required for rewriting in the default
    ``Mode``.
    If multiple tags are provided, separate them by ``':'``.
@@ -686,7 +686,7 @@ import ``aesara`` and print the config variable, as in:
    Default: ``False``
-    When ``True``, print the optimizations applied to stdout.
+    When ``True``, print the rewrites applied to stdout.
 .. attribute:: nocleanup
@@ -792,7 +792,7 @@ import ``aesara`` and print the config variable, as in:
    Setting this attribute to something other than ``'off'`` activates a
    debugging mechanism, for which Aesara executes the graph on-the-fly, as it
    is being built. This allows the user to spot errors early on (such as
-    dimension mis-matches) **before** optimizations are applied.
+    dimension mis-matches) **before** rewrites are applied.
    Aesara will execute the graph using constants and/or shared variables
    provided by the user. Purely symbolic variables (e.g. ``x =
@@ -809,8 +809,8 @@ import ``aesara`` and print the config variable, as in:
 .. attribute:: compute_test_value_opt
    As ``compute_test_value``, but it is the value used during Aesara's
-    optimization phase.  This is used to help debug shape errors in Aesara's
+    rewriting phase.  This is used to help debug shape errors in Aesara's
-    optimizations.
+    rewrites.
 .. attribute:: print_test_value
@@ -898,21 +898,21 @@ import ``aesara`` and print the config variable, as in:
    Int value, default: 0
-    The verbosity level of the meta-optimizer: ``0`` for silent, ``1`` to only
+    The verbosity level of the meta-rewriter: ``0`` for silent, ``1`` to only
-    warn when Aesara cannot meta-optimize an :class:`Op`, ``2`` for full output (e.g.
+    warn when Aesara cannot meta-rewrite an :class:`Op`, ``2`` for full output (e.g.
-    timings and the optimizations selected).
+    timings and the rewrites selected).
 .. attribute:: config.metaopt__optimizer_excluding
    Default: ``""``
-    A list of optimizer tags that we don't want included in the meta-optimizer.
+    A list of rewrite tags that we don't want included in the meta-rewriter.
    Multiple tags are separate by ``':'``.
 .. attribute:: config.metaopt__optimizer_including
    Default: ``""``
-    A list of optimizer tags to be included during meta-optimization.
+    A list of rewriter tags to be included during meta-rewriting.
    Multiple tags are separate by ``':'``.
--- a/doc/library/printing.rst
+++ b/doc/library/printing.rst
@@ -33,7 +33,7 @@ hello world __str__ = [ 1.  2.  3.]
 If you print more than one thing in a function like `f`, they will not
 necessarily be printed in the order that you think.  The order might even depend
-on which graph optimizations are applied. Strictly speaking, the order of
+on which graph rewrites are applied. Strictly speaking, the order of
 printing is not completely defined by the interface --
 the only hard rule is that if the input of some print output `a` is
 ultimately used as an input to some other print input `b` (so that `b` depends on `a`),
@@ -56,7 +56,7 @@ Aesara also provides :func:`aesara.printing.pydotprint` that creates a png image
 >>> x = at.dscalar('x')
 >>> y = x ** 2
 >>> gy = grad(y, x)
->>> pp(gy)  # print out the gradient prior to optimization
+>>> pp(gy)  # print out the gradient prior to rewriting
 '((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
 >>> f = function([x], gy)
 >>> pp(f.maker.fgraph.outputs[0])

--- a/doc/library/scan.rst
+++ b/doc/library/scan.rst
@@ -81,7 +81,7 @@ Scan returns a tuple containing our result (``result``) and a
 dictionary of updates (empty in this case). Note that the result
 is not a matrix, but a 3D tensor containing the value of ``A**k`` for
 each step. We want the last value (after ``k`` steps) so we compile
-a function to return just that. Note that there is an optimization, that
+a function to return just that. Note that there is a rewrite that
 at compile time will detect that you are using just the last value of the
 result and ensure that scan does not store all the intermediate values
 that are used. So do not worry if ``A`` and ``k`` are large.
@@ -341,7 +341,7 @@ function applied at each step) you do not need to pass them as arguments.
 Scan will find them on its own and add them to the graph.
 However, passing them to the scan function is a good practice, as it avoids
 Scan Op calling any earlier (external) Op over and over. This results in a
-simpler computational graph, which speeds up the optimization and the
+simpler computational graph, which speeds up the rewriting and the
 execution. To pass the shared variables to Scan you need to put them in a list
 and give it to the ``non_sequences`` argument. Here is the Gibbs sampling code
 updated:
@@ -381,7 +381,7 @@ Using shared variables - the strict flag
 ----------------------------------------
 As we just saw, passing the shared variables to scan may result in a simpler
-computational graph, which speeds up the optimization and the execution. A
+computational graph, which speeds up the rewriting and the execution. A
 good way to remember to pass every shared variable used during scan is to use
 the ``strict`` flag. When set to true, scan checks that all the necessary shared
 variables in ``fn`` are passed as explicit arguments to ``fn``. This has to be
@@ -599,8 +599,8 @@ about 6x slower than the forward, a ~20% slowdown is expected. Apart from the
 is similar to the classic ``scan`` function.
-Optimizing Scan's performance
+Improving Scan's performance
-----------------------------
+----------------------------
 This section covers some ways to improve performance of an Aesara function
 using Scan.
@@ -645,29 +645,29 @@ is not provided for this argument, the value of the flag
 ``config.scan__allow_gc`` is used).
-Graph optimizations
+Graph Rewrites
-^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^
 This one is simple but still worth pointing out. Aesara is able to
-automatically recognize and optimize many computation patterns. However, there
+automatically recognize and rewrite many computation patterns. However, there
-are patterns that Aesara doesn't optimize because doing so would change the
+are patterns that Aesara doesn't rewrite because doing so would change the
 user interface (such as merging shared variables together into a single one,
 for instance). Additionally, Aesara doesn't catch every case that it could
-optimize and so it remains useful for performance that the user defines an
+rewrite and so it remains useful for performance that the user defines an
 efficient graph in the first place. This is also the case, and sometimes even
 more so, for the graph inside of Scan. This is because it will be executed
 many times for every execution of the Aesara function that contains it.
 The `LSTM tutorial <http://deeplearning.net/tutorial/lstm.html>`_ on
-`DeepLearning.net <http://deeplearning.net>`_ provides an example of an
+`DeepLearning.net <http://deeplearning.net>`_ provides an example of a
-optimization that Aesara cannot perform. Instead of performing many matrix
+rewrite that Aesara cannot perform. Instead of performing many matrix
 multiplications between matrix :math:`x_t` and each of the shared matrices
 :math:`W_i`, :math:`W_c`, :math:`W_f` and :math:`W_o`, the matrices
 :math:`W_*`, are merged into a single shared matrix :math:`W` and the graph
 performs a single larger matrix multiplication between :math:`W` and
 :math:`x_t`. The resulting matrix is then sliced to obtain the results of that
 the small individual matrix multiplications would have produced. This
-optimization replaces several small and inefficient matrix multiplications by
+rewrite replaces several small and inefficient matrix multiplications by
 a single larger one and thus improves performance at the cost of a potentially
 higher memory usage.

--- a/doc/library/sparse/index.rst
+++ b/doc/library/sparse/index.rst
@@ -231,18 +231,18 @@ List of Implemented Operations
    - :func:`sampling_dot <aesara.sparse.basic.sampling_dot>`.
        - Both inputs must be dense.
-        - The grad implemented is structured for `p`.
+        - The grad implemented is structured for ``p``.
        - Sample of the dot and sample of the gradient.
        - C code for perform but not for grad.
        - Returns sparse for perform and grad.
    - :func:`usmm <aesara.sparse.basic.usmm>`.
        - You *shouldn't* insert this op yourself!
-           - There is an optimization that transform a
+           - There is a rewrite that transforms a
-             :func:`dot <aesara.sparse.basic.dot>` to ``Usmm`` when possible.
+             :func:`dot <aesara.sparse.basic.dot>` to :class:`Usmm` when possible.
-        - This op is the equivalent of gemm for sparse dot.
+        - This :class:`Op` is the equivalent of gemm for sparse dot.
-        - There is no grad implemented for this op.
+        - There is no grad implemented for this :class:`Op`.
        - One of the inputs must be sparse, the other sparse or dense.
        - Returns a dense from perform.

--- a/doc/library/tensor/basic.rst
+++ b/doc/library/tensor/basic.rst
@@ -1199,7 +1199,7 @@ Bitwise
 Inplace
 -------
-In-place operators are *not* supported.  Aesara's graph-optimizations
+In-place operators are *not* supported.  Aesara's graph rewrites
 will determine which intermediate values to use for in-place
 computations.  If you would like to update the value of a
 :term:`shared variable`, consider using the ``updates`` argument to

--- a/doc/library/tensor/basic_opt.rst
+++ b/doc/library/tensor/basic_opt.rst
 ===================================================================
-:mod:`tensor.basic_opt` --  Tensor Optimizations
+:mod:`tensor.basic_opt` --  Tensor Rewrites
 ===================================================================
 .. module:: tensor.basic_opt
   :platform: Unix, Windows
-   :synopsis: Tensor Optimizations
+   :synopsis: Tensor Rewrites
 .. moduleauthor:: LISA, PyMC Developers, Aesara Developers
 .. automodule:: aesara.tensor.basic_opt

--- a/doc/library/tensor/math_opt.rst
+++ b/doc/library/tensor/math_opt.rst
-===================================================================
+==============================================================
-:mod:`tensor.math_opt` --  Tensor Optimizations for Math Operations
+:mod:`tensor.math_opt` --  Tensor Rewrites for Math Operations
-===================================================================
+==============================================================
 .. module:: tensor.math_opt
   :platform: Unix, Windows
-   :synopsis: Tensor Optimizations for Math Operations
+   :synopsis: Tensor Rewrites for Math Operations
 .. moduleauthor:: LISA, PyMC Developers, Aesara Developers
 .. automodule:: aesara.tensor.math_opt

--- a/doc/library/tensor/nnet/basic.rst
+++ b/doc/library/tensor/nnet/basic.rst
@@ -61,45 +61,44 @@
 .. function:: ultra_fast_sigmoid(x)
-   Returns the *approximated* standard :func:`sigmoid` nonlinearity applied to x.
+   Returns an approximate standard :func:`sigmoid` nonlinearity applied to ``x``.
-    :Parameters: *x* - symbolic Tensor (or compatible)
+    :Parameters: ``x`` - symbolic Tensor (or compatible)
-    :Return type: same as x
+    :Return type: same as ``x``
    :Returns: approximated element-wise sigmoid: :math:`sigmoid(x) = \frac{1}{1 + \exp(-x)}`.
-    :note: To automatically change all :func:`sigmoid` ops to this version, use
+    :note: To automatically change all :func:`sigmoid`\ :class:`Op`\s to this version, use
-      the Aesara optimization ``local_ultra_fast_sigmoid``. This can be done
+      the Aesara rewrite `local_ultra_fast_sigmoid`. This can be done
      with the Aesara flag ``optimizer_including=local_ultra_fast_sigmoid``.
-      This optimization is done late, so it should not affect
+      This rewrite is done late, so it should not affect stabilization rewrites.
-      stabilization optimization.
   .. note:: The underlying code will return 0.00247262315663 as the
       minimum value and 0.997527376843 as the maximum value. So it
       never returns 0 or 1.
-   .. note:: Using directly the ultra_fast_sigmoid in the graph will
+   .. note:: Using directly the `ultra_fast_sigmoid` in the graph will
-       disable stabilization optimization associated with it. But
+       disable stabilization rewrites associated with it. But
-       using the optimization to insert them won't disable the
+       using the rewrite to insert them won't disable the
-       stability optimization.
+       stability rewrites.
 .. function:: hard_sigmoid(x)
-   Returns the *approximated* standard :func:`sigmoid` nonlinearity applied to x.
+   Returns an approximate standard :func:`sigmoid` nonlinearity applied to `1x1`.
-    :Parameters: *x* - symbolic Tensor (or compatible)
+    :Parameters: ``x`` - symbolic Tensor (or compatible)
-    :Return type: same as x
+    :Return type: same as ``x``
    :Returns: approximated element-wise sigmoid: :math:`sigmoid(x) = \frac{1}{1 + \exp(-x)}`.
-    :note: To automatically change all :func:`sigmoid` ops to this version, use
+    :note: To automatically change all :func:`sigmoid`\ :class:`Op`\s to this version, use
-      the Aesara optimization ``local_hard_sigmoid``. This can be done
+      the Aesara rewrite `local_hard_sigmoid`. This can be done
      with the Aesara flag ``optimizer_including=local_hard_sigmoid``.
-      This optimization is done late, so it should not affect
+      This rewrite is done late, so it should not affect
-      stabilization optimization.
+      stabilization rewrites.
   .. note:: The underlying code will return an exact 0 or 1 if an
-      element of x is too small or too big.
+      element of ``x`` is too small or too big.
-   .. note:: Using directly the ultra_fast_sigmoid in the graph will
+   .. note:: Using directly the `ultra_fast_sigmoid` in the graph will
-       disable stabilization optimization associated with it. But
+       disable stabilization rewrites associated with it. But
-       using the optimization to insert them won't disable the
+       using the rewrites to insert them won't disable the
-       stability optimization.
+       stability rewrites.
 .. function:: softplus(x)

--- a/doc/optimizations.rst
+++ b/doc/optimizations.rst
--- a/doc/sandbox/elemwise_compiler.rst
+++ b/doc/sandbox/elemwise_compiler.rst
--- a/doc/sandbox/how_to_make_ops.rst
+++ b/doc/sandbox/how_to_make_ops.rst
--- a/doc/sandbox/performance.rst
+++ b/doc/sandbox/performance.rst
--- a/doc/troubleshooting.rst
+++ b/doc/troubleshooting.rst
--- a/doc/tutorial/aliasing.rst
+++ b/doc/tutorial/aliasing.rst
--- a/doc/tutorial/conditions.rst
+++ b/doc/tutorial/conditions.rst
--- a/doc/tutorial/debug_faq.rst
+++ b/doc/tutorial/debug_faq.rst
--- a/doc/tutorial/faq_tutorial.rst
+++ b/doc/tutorial/faq_tutorial.rst
--- a/doc/tutorial/gradients.rst
+++ b/doc/tutorial/gradients.rst
--- a/doc/tutorial/modes.rst
+++ b/doc/tutorial/modes.rst
--- a/doc/tutorial/nan_tutorial.rst
+++ b/doc/tutorial/nan_tutorial.rst
--- a/doc/tutorial/printing_drawing.rst
+++ b/doc/tutorial/printing_drawing.rst
--- a/doc/tutorial/profiling.rst
+++ b/doc/tutorial/profiling.rst
--- a/doc/tutorial/profiling_example_out.prof
+++ b/doc/tutorial/profiling_example_out.prof
--- a/doc/tutorial/shape_info.rst
+++ b/doc/tutorial/shape_info.rst
--- a/tests/graph/test_destroyhandler.py
+++ b/tests/graph/test_destroyhandler.py
--- a/tests/graph/test_features.py
+++ b/tests/graph/test_features.py
--- a/tests/graph/test_fg.py
+++ b/tests/graph/test_fg.py
--- a/tests/graph/test_opt.py
+++ b/tests/graph/test_opt.py
--- a/tests/tensor/nnet/test_basic.py
+++ b/tests/tensor/nnet/test_basic.py
--- a/tests/tensor/test_basic_opt.py
+++ b/tests/tensor/test_basic_opt.py
--- a/tests/tensor/test_math_opt.py
+++ b/tests/tensor/test_math_opt.py
--- a/tests/tensor/test_subtensor_opt.py
+++ b/tests/tensor/test_subtensor_opt.py