Safeguard local_log_sum_exp optimization against -inf values and make it non-symmetric

Fixes #461

Safeguard local_log_sum_exp optimization against -inf values and make it non-symmetric
08f49d16 · Ricardo · Ricardo Vieira · 5b85bca4 · 08f49d16 · 08f49d16
--- a/aesara/tensor/math_opt.py
+++ b/aesara/tensor/math_opt.py
@@ -11,7 +11,6 @@ from functools import reduce
 import numpy as np
 import aesara.scalar.basic as aes
-from aesara import compile
 from aesara.assert_op import assert_op
 from aesara.configdefaults import config
 from aesara.graph.basic import Constant, Variable
@@ -2312,6 +2311,8 @@ def local_log_add_exp(fgraph, node):
                return [ret]
+@register_stabilize
+@register_specialize
 @local_optimizer([log])
 def local_log_sum_exp(fgraph, node):
    # log(sum_i(exp(x_i))) = x_max + log(sum_i(exp(x_i - x_max)))
@@ -2340,7 +2341,19 @@ def local_log_sum_exp(fgraph, node):
    max_pre_exp = aet_max(pre_exp, axis=axis)
    max_pre_exp_keepdims = makeKeepDims(pre_exp, max_pre_exp, axis)
-    ret = max_pre_exp + log(aet_sum(exp(pre_exp - max_pre_exp_keepdims), axis=axis))
+    # Do not offset when max_pre = -np.inf, to avoid nan in the output
+    # Switch statement is placed directly inside sum to break the self-symmetry
+    # of the returned output (otherwise the optimization would not stabilize)
+    ret = max_pre_exp + log(
+        aet_sum(
+            switch(
+                isinf(max_pre_exp_keepdims),
+                exp(max_pre_exp_keepdims),
+                exp(pre_exp - max_pre_exp_keepdims),
+            ),
+            axis=axis,
+        ),
+    )
    # Restore the dimshuffle op, if any.
    if dimshuffle_op:
@@ -2349,14 +2362,6 @@ def local_log_sum_exp(fgraph, node):
    return [ret]
-compile.optdb.register(
-    "local_log_sum_exp",
-    in2out(local_log_sum_exp, ignore_newtrees=True),
-    1.6,
-    "fast_run",
-)
 def add_calculate(num, denum, aslist=False, out_type=None):
    # TODO: make sure that this function and mul_calculate are similar
    if out_type is None:

--- a/tests/tensor/test_math_opt.py
+++ b/tests/tensor/test_math_opt.py
@@ -4000,6 +4000,16 @@ def test_local_log_sum_exp3():
    assert np.allclose(optimised_ret, 100.0)
+def test_local_log_sum_exp_inf():
+    # Test that when max = +-inf, optimized output still works correctly
+    x = vector("x")
+    f = compile_graph_log_sum_exp(x, axis=0)
+    assert f([-np.inf, -np.inf]) == -np.inf
+    assert f([np.inf, np.inf]) == np.inf
+    assert f([-np.inf, np.inf]) == np.inf
 def test_local_reciprocal_1_plus_exp():
    x = vector("x")
    y = aet.reciprocal(1 + exp(x))