Move math-related optimizations to theano.tensor.math_opt

06632882 · Brandon T. Willard · Thomas Wiecki · 980451b5 · 06632882 · 06632882
--- a/doc/library/tensor/math_opt.txt
+++ b/doc/library/tensor/math_opt.txt
+===================================================================
+:mod:`tensor.math_opt` --  Tensor Optimizations for Math Operations
+===================================================================
+
+.. module:: tensor.math_opt
+   :platform: Unix, Windows
+   :synopsis: Tensor Optimizations for Math Operations
+.. moduleauthor:: LISA, PyMC Developers
+
+.. automodule:: theano.tensor.math_opt
+    :members:
--- a/doc/optimizations.txt
+++ b/doc/optimizations.txt
@@ -5,12 +5,12 @@ Optimizations
 ==============

 Theano applies many kinds of graph optimizations, with different objectives:
- * simplifying and standardizing the form of the expression graph (e.g.  :term:`merge`, :term:`add canonicalization` ), 
+ * simplifying and standardizing the form of the expression graph (e.g.  :term:`merge`, :term:`add canonicalization` ),
 * reducing the maximum memory footprint (e.g. :term:`inplace_elemwise`),
 * increasing execution speed (e.g. :term:`constant folding`).

 The optimizations are listed in roughly chronological order.  The table below
-gives a quick summary of the optimizations included in the default modes. 
+gives a quick summary of the optimizations included in the default modes.
 The descriptions are brief and point to further reading.

 If you would like to add an additional optimization, refer to
@@ -33,17 +33,17 @@ For an even faster run-time, we could disable assertions (which could be time co

        python -c "import theano; theano.compile.optdb.query(theano.compile.predefined_optimizers['<OPT_ID>']).print_summary()"

-    where <OPT_ID> can be one of o1 (:ref:`† <o1=>`), o2, o3, o4 (:ref:`* <o4=>`), 
+    where <OPT_ID> can be one of o1 (:ref:`† <o1=>`), o2, o3, o4 (:ref:`* <o4=>`),
    Stabilization or unsafe.


 ========================================================= ============== === === ================= ============= ======
-Optimization                                              o4             o3  o2  o1                Stabilization unsafe  
-                                                          :ref:`* <o4=>`         :ref:`† <o1=>` 
+Optimization                                              o4             o3  o2  o1                Stabilization unsafe
+                                                          :ref:`* <o4=>`         :ref:`† <o1=>`
 ========================================================= ============== === === ================= ============= ======
-:term:`merge`                                             x              x   x    x                              x   
-:term:`constant folding<constant folding>`                x              x   x    x                              x   
-:term:`GPU transfer`                                      x              x   x    x                              x   
+:term:`merge`                                             x              x   x    x                              x
+:term:`constant folding<constant folding>`                x              x   x    x                              x
+:term:`GPU transfer`                                      x              x   x    x                              x
 :term:`shape promotion<shape promotion>`                  x              x                                       x
 :term:`fill cut<fill cut>`                                x              x                                       x
 :term:`inc_subtensor srlz.<inc_subtensor serialization>`  x              x                                       x
@@ -59,12 +59,12 @@ Optimization                                              o4             o3  o2
 :term:`add specialize <add specialization>`               x              x                                       x
 :term:`mul specialize <mul specialization>`               x              x                                       x
 :term:`pow specialize <pow specialization>`               x              x                                       x
-:term:`inplace_setsubtensor`                              x                                                      
+:term:`inplace_setsubtensor`                              x
 :term:`gemm`                                              x              x                                       x
-:term:`inplace_elemwise`                                  x                                                          
-:term:`inplace_random`                                    x                                                      
-:term:`elemwise fusion`                                   x              x   x                                   x   
-:term:`local_log_softmax`                                 x              x                         x             x   
+:term:`inplace_elemwise`                                  x
+:term:`inplace_random`                                    x
+:term:`elemwise fusion`                                   x              x   x                                   x
+:term:`local_log_softmax`                                 x              x                         x             x
 :term:`local_remove_all_assert`                                                                                  x
 ========================================================= ============== === === ================= ============= ======

@@ -104,7 +104,7 @@ Optimization                                              o4             o3  o2

        See :func:`opt.local_shape_lift_*`

-    fill cut             
+    fill cut
        `Fill(a,b)` means to make a tensor of the shape of `a` full of the value `b`.
        Often when fills are used with elementwise operations (e.g. f) they are
        un-necessary:
@@ -113,18 +113,18 @@ Optimization                                              o4             o3  o2

        See :func:`opt.local_fill_sink`

-    inc_subtensor serialization  
+    inc_subtensor serialization
        Incrementing a small subregion of a large tensor can be done quickly
        using an inplace operation, but if two increments are being done on
        the same large tensor, then only one of them can be done inplace.
        This optimization reorders such graphs so that all increments can be
-        done inplace.  
-        
+        done inplace.
+
        ``inc_subtensor(a,b,idx) + inc_subtensor(a,c,idx) -> inc_subtensor(inc_subtensor(a,b,idx),c,idx)``

        See :func:`local_IncSubtensor_serialize`

-    reshape_chain        
+    reshape_chain
        This optimizes graphs like ``reshape(reshape(x, shape1), shape2)`` -> ``reshape(x, shape2)``

        See :func:`local_reshape_chain`
@@ -140,22 +140,22 @@ Optimization                                              o4             o3  o2
        form:

        .. math::
-            
+
            (a+b+c+...) - (z + x + y + ....)

-        See :class:`Canonizer`, :attr:`local_add_canonizer`
+        See :class:`AlgebraicCanonizer`, :attr:`local_add_canonizer`

-    mul canonicalization       
+    mul canonicalization
        Rearrange expressions of multiplication and division to a canonical
        form:

        .. math::
-            
+
            \frac{a * b * c * ...}{z * x * y * ....}

-        See :class:`Canonizer`, :attr:`local_mul_canonizer`
+        See :class:`AlgebraicCanonizer`, :attr:`local_mul_canonizer`

-    dot22                
+    dot22
        This simple optimization replaces dot(matrix, matrix) with a special
        `dot22` op that only works for matrix multiplication.  This op is
        implemented with a call to GEMM, and sometimes replaced entirely by
@@ -163,63 +163,63 @@ Optimization                                              o4             o3  o2

        See :func:`local_dot_to_dot22`

-    sparse_dot           
+    sparse_dot
        Theano has a sparse matrix multiplication algorithm that is faster in
        many cases than scipy's (for dense matrix output).  This optimization
        swaps scipy's algorithm for ours.

        See :func:`local_structured_dot`

-    sum_scalar_mul       
+    sum_scalar_mul
        This optimizes graphs like ``sum(scalar * tensor)`` -> ``scalar * sum(tensor)``

        See :func:`local_sum_mul_by_scalar`

-    neg_neg              
+    neg_neg
        Composition of two negatives can be cancelled out.

        See :func:`local_neg_neg`

-    neg_div_neg          
+    neg_div_neg
        Matching negatives in both the numerator and denominator can both be removed.

        See :func:`local_neg_div_neg`

-    add specialization       
+    add specialization
        This optimization simplifies expressions involving the addition of
        zero.
-        
+
        See :func:`local_add_specialize`

-    mul specialization       
+    mul specialization
        Several special cases of mul() exist, and this optimization tries to
        recognize them. Some examples include:
        * ``mul(x,x)`` -> ``x**2``
        * ``mul(x,0)`` -> ``zeros_like(x)``
        * ``mul(x, -1)`` -> ``neg(x)``
-        
+
        See :func:`local_mul_specialize`

-    pow specialization       
+    pow specialization
        Several special cases of pow() exist, and this optimization tries to
        recognize them. Some examples include:
        * ``pow(x,2)`` -> ``x**2``
        * ``pow(x,0)`` -> ``ones_like(x)``
        * ``pow(x, -0.5)`` -> ``inv(sqrt(x))``
-        
+
        See :func:`local_pow_specialize`
-        

-    inplace_setsubtensor 
+
+    inplace_setsubtensor
        In order to be a pure Op, setsubtensor must copy its entire input, and
        modify just the subtensor in question (possibly a single element).  It
        is much more efficient to modify that element inplace.

        See :func:`local_inplace_setsubtensor`

-    gemm                 
+    gemm
        Numerical libraries such as MKL and ATLAS implement the BLAS-level-3
-        interface, and provide a function `GEMM` that implements 
+        interface, and provide a function `GEMM` that implements
        :math:`Z \leftarrow \alpha A \cdot B + \beta Z`, for matrices `A`, `B`
        and `Z`, and scalars :math:`\alpha, \beta`.

@@ -237,14 +237,14 @@ Optimization                                              o4             o3  o2

        See :func:`insert_inplace_optimizer`

-    inplace_random       
+    inplace_random
        Typically when a graph uses random numbers, the RandomState is stored
        in a shared variable, used once per call and, updated after each function
        call.  In this common case, it makes sense to update the random number generator in-place.

        See :func:`random_make_inplace`

-    elemwise fusion 
+    elemwise fusion
        This optimization compresses subgraphs of computationally cheap
        elementwise operations into a single Op that does the whole job in a
        single pass over the inputs (like loop fusion).  This is a win when
@@ -260,7 +260,7 @@ Optimization                                              o4             o3  o2
        a graph copying data from GPU to CPU in order to evaluate an
        expression that could have been evaluated on the GPU, we substitute
        the GPU version of that Op for the CPU version.  Likewise if we are
-        copying the output of a Op with a GPU implementation to the GPU, 
+        copying the output of a Op with a GPU implementation to the GPU,
        then we substitute the GPU version for the CPU version.  In this way, if all goes well,
        this procedure will result in a graph with the following form:

@@ -286,6 +286,5 @@ Optimization                                              o4             o3  o2
 	setting ``optimizer_including=local_remove_all_assert`` which will
 	remove all assertions in the graph for checking user inputs are valid.
        Use this optimization if you are sure everything is valid in your graph.
-	
-	See :ref:`unsafe_optimization`

+	See :ref:`unsafe_optimization`
--- a/tests/tensor/test_opt.py
+++ b/tests/tensor/test_opt.py
@@ -47,11 +47,8 @@ from theano.tensor.basic_opt import (
    MakeVector,
    ShapeFeature,
    assert_op,
-    local_add_specialize,
    local_canonicalize_alloc,
    local_dimshuffle_lift,
-    local_greedy_distributor,
-    local_lift_transpose_through_dot,
    local_merge_alloc,
    local_reshape_to_dimshuffle,
    local_useless_alloc,
@@ -59,7 +56,6 @@ from theano.tensor.basic_opt import (
    local_useless_elemwise,
    local_useless_reshape,
    make_vector,
-    mul_canonizer,
    register_specialize,
 )
 from theano.tensor.blas import Dot22, Gemv
@@ -109,6 +105,12 @@ from theano.tensor.math import round as tt_round
 from theano.tensor.math import sgn, sin, sinh, sqr, sqrt, sub
 from theano.tensor.math import sum as tt_sum
 from theano.tensor.math import tan, tanh, true_div, xor
+from theano.tensor.math_opt import (
+    local_add_specialize,
+    local_greedy_distributor,
+    local_lift_transpose_through_dot,
+    mul_canonizer,
+)
 from theano.tensor.nnet.sigm import softplus
 from theano.tensor.shape import Reshape, Shape_i, SpecifyShape, reshape, specify_shape
 from theano.tensor.subtensor import (
@@ -465,7 +467,7 @@ class TestCanonize:
        print(pprint(g.outputs[0]))

    def test_elemwise_multiple_inputs_optimisation(self):
-        # verify that the Canonizer merge sequential Elemwise({mul,add}) part 1
+        # verify that the AlgebraicCanonizer merge sequential Elemwise({mul,add}) part 1
        #
        # This part are that case that is done, but don't include case
        # that are not implemented but are supposed to be.
@@ -574,8 +576,8 @@ class TestCanonize:
        ]  # [10:11]
        # print cases

-        # We must be sure that the Canonizer is working, but that we don't have other
-        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
+        # We must be sure that the AlgebraicCanonizer is working, but that we don't have other
+        # optimisation that could hide bug in the AlgebraicCanonizer as local_elemwise_fusion
        mode = get_default_mode()
        opt = Query(["canonicalize"])
        opt = opt.excluding("local_elemwise_fusion")
@@ -595,11 +597,11 @@ class TestCanonize:
            assert out_dtype == out.dtype

    @pytest.mark.skip(
-        reason="Current implementation of Canonizer does not "
+        reason="Current implementation of AlgebraicCanonizer does not "
        "implement all cases. Skip the corresponding test."
    )
    def test_elemwise_multiple_inputs_optimisation2(self):
-        # verify that the Canonizer merge sequential Elemwise({mul,add}) part 2.
+        # verify that the AlgebraicCanonizer merge sequential Elemwise({mul,add}) part 2.
        # This part are that case that should have been done, but that are not implemented.
        # Test with and without DimShuffle

@@ -709,8 +711,8 @@ class TestCanonize:
        ]  # [10:11]
        # print cases

-        # We must be sure that the Canonizer is working, but that we don't have other
-        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
+        # We must be sure that the AlgebraicCanonizer is working, but that we don't have other
+        # optimisation that could hide bug in the AlgebraicCanonizer as local_elemwise_fusion
        mode = get_default_mode()
        mode._optimizer = Query(["canonicalize"])
        mode._optimizer = mode._optimizer.excluding("local_elemwise_fusion")
@@ -728,7 +730,7 @@ class TestCanonize:

    @pytest.mark.slow
    def test_multiple_case(self):
-        # test those case take from the comment in Canonizer
+        # test those case take from the comment in AlgebraicCanonizer
        # x / x -> 1
        # (x * y) / x -> y
        # x / y / x -> 1 / y
@@ -756,8 +758,8 @@ class TestCanonize:
        dwv = _asarray(np.random.rand(*shp), dtype="float64")
        dvv = _asarray(np.random.rand(shp[0]), dtype="float64").reshape(1, shp[0])

-        # We must be sure that the Canonizer is working, but that we don't have other
-        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
+        # We must be sure that the AlgebraicCanonizer is working, but that we don't have other
+        # optimisation that could hide bug in the AlgebraicCanonizer as local_elemwise_fusion
        mode = get_default_mode()

        opt = Query(["canonicalize"])
@@ -1109,7 +1111,7 @@ class TestCanonize:
        assert f.maker.fgraph.toposort()[0].op == sgn

    @pytest.mark.skip(
-        reason="Current implementation of Canonizer does not "
+        reason="Current implementation of AlgebraicCanonizer does not "
        "implement all cases. Skip the corresponding test."
    )
    def test_multiple_case_that_fail(self):
@@ -1123,8 +1125,8 @@ class TestCanonize:
        dyv = _asarray(np.random.rand(*shp), dtype="float32")
        dzv = _asarray(np.random.rand(*shp), dtype="float32")
        # fvv = _asarray(np.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
-        # We must be sure that the Canonizer is working, but that we don't have other
-        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
+        # We must be sure that the AlgebraicCanonizer is working, but that we don't have other
+        # optimisation that could hide bug in the AlgebraicCanonizer as local_elemwise_fusion
        mode = get_default_mode()

        opt = Query(["canonicalize"])

--- a/theano/scan/opt.py
+++ b/theano/scan/opt.py
@@ -86,7 +86,7 @@ from theano.scan.utils import (
    scan_args,
    scan_can_remove_outs,
 )
-from theano.tensor import basic_opt
+from theano.tensor import basic_opt, math_opt
 from theano.tensor.basic import Alloc, AllocEmpty, get_scalar_constant_value
 from theano.tensor.elemwise import DimShuffle, Elemwise
 from theano.tensor.exceptions import NotScalarConstantError
@@ -118,8 +118,8 @@ __copyright__ = "(c) 2010, Universite de Montreal"
 _logger = logging.getLogger("theano.scan.opt")

 list_opt_slice = [
-    basic_opt.local_abs_merge,
-    basic_opt.local_mul_switch_sink,
+    math_opt.local_abs_merge,
+    math_opt.local_mul_switch_sink,
    basic_opt.local_upcast_elemwise_constant_inputs,
    basic_opt.local_useless_switch,
    basic_opt.constant_folding,

--- a/theano/tensor/basic_opt.py
+++ b/theano/tensor/basic_opt.py
--- a/theano/tensor/math_opt.py
+++ b/theano/tensor/math_opt.py
--- a/theano/tensor/nnet/basic.py
+++ b/theano/tensor/nnet/basic.py
@@ -31,7 +31,7 @@ from theano.scalar import UnaryScalarOp

 # Work-around for Python 3.6 issue that prevents `import theano.tensor as tt`
 from theano.tensor import basic as tt
-from theano.tensor import basic_opt, extra_ops
+from theano.tensor import extra_ops, math_opt
 from theano.tensor.basic import ARange, as_tensor_variable
 from theano.tensor.basic_opt import (
    register_canonicalize,
@@ -985,7 +985,7 @@ def softmax_simplifier(numerators, denominators):
    return numerators, denominators


-basic_opt.local_mul_canonizer.add_simplifier(softmax_simplifier, "softmax_simplifier")
+math_opt.local_mul_canonizer.add_simplifier(softmax_simplifier, "softmax_simplifier")


 class CrossentropySoftmaxArgmax1HotWithBias(COp):

--- a/theano/tensor/nnet/sigm.py
+++ b/theano/tensor/nnet/sigm.py
@@ -1085,7 +1085,7 @@ def local_1msigmoid(fgraph, node):

 register_local_1msigmoid = False
 # This is False because the Stabilize pattern above
-# is looking for 1-sigm.  Also Canonizer turns neg into *(-1) and so
+# is looking for 1-sigm.  Also AlgebraicCanonizer turns neg into *(-1) and so
 # this optimization might set off an unwanted chain of things.
 # OTH - this transformation can be seen as pushing normal arithmetic either  below or above the
 # sigmoidal nonlinearity... so if the canonicalized form had anything to say about that then it