Benchmark radon model function

3f457d07 · Ricardo Vieira · Ricardo Vieira · d4eff40d · 3f457d07 · 3f457d07
--- a/conftest.py
+++ b/conftest.py
@@ -3,6 +3,10 @@ import os
 import pytest


+# Using pytest_plugins causes `tests/link/c/test_cmodule.py::test_cache_versioning` to fail
+# pytest_plugins = ["tests.fixtures"]
+
+
 def pytest_sessionstart(session):
    os.environ["PYTENSOR_FLAGS"] = ",".join(
        [

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -150,7 +150,7 @@ lines-after-imports = 2
 "pytensor/misc/check_duplicate_key.py" = ["T201"]
 "pytensor/misc/check_blas.py" = ["T201"]
 "pytensor/bin/pytensor_cache.py" = ["T201"]
-# For the tests we skip because `pytest.importorskip` is used:
+# For the tests we skip `E402` because `pytest.importorskip` is used:
 "tests/link/jax/test_scalar.py" = ["E402"]
 "tests/link/jax/test_tensor_basic.py" = ["E402"]
 "tests/link/numba/test_basic.py" = ["E402"]

--- a/tests/compile/function/test_types.py
+++ b/tests/compile/function/test_types.py
@@ -33,6 +33,7 @@ from pytensor.tensor.type import (
    scalars,
    vector,
 )
+from tests.fixtures import *  # noqa: F403


 pytestmark = pytest.mark.filterwarnings("error")
@@ -1357,3 +1358,67 @@ def test_minimal_random_function_call_benchmark(trust_input, benchmark):

    rng_val = np.random.default_rng()
    benchmark(f, rng_val)
+
+
+@pytest.mark.parametrize("mode", ["C", "C_VM"])
+def test_radon_model_compile_repeatedly_benchmark(mode, radon_model, benchmark):
+    joined_inputs, [model_logp, model_dlogp] = radon_model
+    rng = np.random.default_rng(1)
+    x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
+
+    def compile_and_call_once():
+        fn = function(
+            [joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
+        )
+        fn(x)
+
+    benchmark.pedantic(compile_and_call_once, rounds=5, iterations=1)
+
+
+@pytest.mark.parametrize("mode", ["C", "C_VM"])
+def test_radon_model_compile_variants_benchmark(
+    mode, radon_model, radon_model_variants, benchmark
+):
+    """Test compilation speed when a slightly variant of a function is compiled each time.
+
+    This test more realistically simulates a use case where a model is recompiled
+    multiple times with small changes, such as in an interactive environment.
+
+    NOTE: For this test to be meaningful on subsequent runs, the cache must be cleared
+    """
+    joined_inputs, [model_logp, model_dlogp] = radon_model
+    rng = np.random.default_rng(1)
+    x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
+
+    # Compile base function once to populate the cache
+    fn = function(
+        [joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
+    )
+    fn(x)
+
+    def compile_and_call_once():
+        for joined_inputs, [model_logp, model_dlogp] in radon_model_variants:
+            fn = function(
+                [joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
+            )
+            fn(x)
+
+    benchmark.pedantic(compile_and_call_once, rounds=1, iterations=1)
+
+
+@pytest.mark.parametrize("mode", ["C", "C_VM", "C_VM_NOGC"])
+def test_radon_model_call_benchmark(mode, radon_model, benchmark):
+    joined_inputs, [model_logp, model_dlogp] = radon_model
+
+    real_mode = "C_VM" if mode == "C_VM_NOGC" else mode
+    fn = function(
+        [joined_inputs], [model_logp, model_dlogp], mode=real_mode, trust_input=True
+    )
+    if mode == "C_VM_NOGC":
+        fn.vm.allow_gc = False
+
+    rng = np.random.default_rng(1)
+    x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
+    fn(x)  # warmup
+
+    benchmark(fn, x)
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
+import numpy as np
+import pytest
+
+import pytensor.tensor as pt
+from pytensor.graph.replace import graph_replace
+from pytensor.graph.rewriting import rewrite_graph
+from pytensor.graph.traversal import explicit_graph_inputs
+
+
+def create_radon_model(
+    intercept_dist="normal", sigma_dist="halfnormal", centered=False
+):
+    def halfnormal(name, *, sigma=1.0, model_logp):
+        log_value = pt.scalar(f"{name}_log")
+        value = pt.exp(log_value)
+
+        logp = (
+            -0.5 * ((value / sigma) ** 2) + pt.log(pt.sqrt(2.0 / np.pi)) - pt.log(sigma)
+        )
+        logp = pt.switch(value >= 0, logp, -np.inf)
+        model_logp.append(logp + value)
+        return value
+
+    def normal(name, *, mu=0.0, sigma=1.0, model_logp, observed=None):
+        value = pt.scalar(name) if observed is None else pt.as_tensor(observed)
+
+        logp = (
+            -0.5 * (((value - mu) / sigma) ** 2)
+            - pt.log(pt.sqrt(2.0 * np.pi))
+            - pt.log(sigma)
+        )
+        model_logp.append(logp)
+        return value
+
+    def lognormal(name, *, mu=0.0, sigma=1.0, model_logp):
+        value = normal(name, mu=mu, sigma=sigma, model_logp=model_logp)
+        return pt.exp(value)
+
+    def zerosumnormal(name, *, sigma=1.0, size, model_logp):
+        raw_value = pt.vector(f"{name}_zerosum", shape=(size - 1,))
+        n = raw_value.shape[0] + 1
+        sum_vals = raw_value.sum(0, keepdims=True)
+        norm = sum_vals / (pt.sqrt(n) + n)
+        fill_value = norm - sum_vals / pt.sqrt(n)
+        value = pt.concatenate([raw_value, fill_value]) - norm
+
+        shape = value.shape
+        _full_size = pt.prod(shape)
+        _degrees_of_freedom = pt.prod(shape[-1:].inc(-1))
+        logp = pt.sum(
+            -0.5 * ((value / sigma) ** 2)
+            - (pt.log(pt.sqrt(2.0 * np.pi)) + pt.log(sigma))
+            * (_degrees_of_freedom / _full_size)
+        )
+        model_logp.append(logp)
+        return value
+
+    dist_fn_map = {
+        fn.__name__: fn for fn in (halfnormal, normal, lognormal, zerosumnormal)
+    }
+
+    rng = np.random.default_rng(1)
+    n_counties = 85
+    county_idx = rng.integers(n_counties, size=919)
+    county_idx.sort()
+    floor = rng.binomial(n=1, p=0.5, size=919).astype(np.float64)
+    log_radon = rng.normal(size=919)
+
+    model_logp = []
+    intercept = dist_fn_map[intercept_dist](
+        "intercept", sigma=10, model_logp=model_logp
+    )
+
+    # County effects
+    county_sd = halfnormal("county_sd", model_logp=model_logp)
+    if centered:
+        county_effect = zerosumnormal(
+            "county_raw", sigma=county_sd, size=n_counties, model_logp=model_logp
+        )
+    else:
+        county_raw = zerosumnormal("county_raw", size=n_counties, model_logp=model_logp)
+        county_effect = county_raw * county_sd
+
+    # Global floor effect
+    floor_effect = normal("floor_effect", sigma=2, model_logp=model_logp)
+
+    county_floor_sd = halfnormal("county_floor_sd", model_logp=model_logp)
+    if centered:
+        county_floor_effect = zerosumnormal(
+            "county_floor_raw",
+            sigma=county_floor_sd,
+            size=n_counties,
+            model_logp=model_logp,
+        )
+    else:
+        county_floor_raw = zerosumnormal(
+            "county_floor_raw", size=n_counties, model_logp=model_logp
+        )
+        county_floor_effect = county_floor_raw * county_floor_sd
+
+    mu = (
+        intercept
+        + county_effect[county_idx]
+        + floor_effect * floor
+        + county_floor_effect[county_idx] * floor
+    )
+
+    sigma = dist_fn_map[sigma_dist]("sigma", model_logp=model_logp)
+    _ = normal(
+        "log_radon",
+        mu=mu,
+        sigma=sigma,
+        observed=log_radon,
+        model_logp=model_logp,
+    )
+
+    model_logp = pt.sum([logp.sum() for logp in model_logp])
+    model_logp = rewrite_graph(
+        model_logp, include=("canonicalize", "stabilize"), clone=False
+    )
+    params = list(explicit_graph_inputs(model_logp))
+    model_dlogp = pt.concatenate([term.ravel() for term in pt.grad(model_logp, params)])
+
+    size = sum(int(np.prod(p.type.shape)) for p in params)
+    joined_inputs = pt.vector("joined_inputs", shape=(size,))
+    idx = 0
+    replacement = {}
+    for param in params:
+        param_shape = param.type.shape
+        param_size = int(np.prod(param_shape))
+        replacement[param] = joined_inputs[idx : idx + param_size].reshape(param_shape)
+        idx += param_size
+    assert idx == joined_inputs.type.shape[0]
+
+    model_logp, model_dlogp = graph_replace([model_logp, model_dlogp], replacement)
+    return joined_inputs, [model_logp, model_dlogp]
+
+
+@pytest.fixture(scope="session")
+def radon_model():
+    return create_radon_model()
+
+
+@pytest.fixture(scope="session")
+def radon_model_variants():
+    # Convert to list comp
+    return [
+        create_radon_model(
+            intercept_dist=intercept_dist,
+            sigma_dist=sigma_dist,
+            centered=centered,
+        )
+        for centered in (True, False)
+        for intercept_dist in ("normal", "lognormal")
+        for sigma_dist in ("halfnormal", "lognormal")
+    ]
--- a/tests/link/numba/test_performance.py
+++ b/tests/link/numba/test_performance.py
@@ -13,6 +13,7 @@ from pytensor.compile.mode import Mode
 from pytensor.graph.rewriting.db import RewriteDatabaseQuery
 from pytensor.link.numba.linker import NumbaLinker
 from pytensor.tensor.math import Max
+from tests.fixtures import *  # noqa: F403


 opts = RewriteDatabaseQuery(include=[None], exclude=["cxx_only", "BlasOpt"])
@@ -75,3 +76,72 @@ def test_careduce_performance(careduce_fn, numpy_fn, axis, inputs, input_vals):

    # FIXME: Why are we asserting >=? Numba could be doing worse than numpy!
    assert mean_numba_time / mean_numpy_time >= 0.75
+
+
+@pytest.mark.parametrize("cache", (False, True))
+def test_radon_model_compile_repeatedly_numba_benchmark(cache, radon_model, benchmark):
+    joined_inputs, [model_logp, model_dlogp] = radon_model
+    rng = np.random.default_rng(1)
+    x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
+
+    def compile_and_call_once():
+        with config.change_flags(numba__cache=cache):
+            fn = function(
+                [joined_inputs],
+                [model_logp, model_dlogp],
+                mode="NUMBA",
+                trust_input=True,
+            )
+            fn(x)
+
+    benchmark.pedantic(compile_and_call_once, rounds=5, iterations=1)
+
+
+@pytest.mark.parametrize("cache", (False, True))
+def test_radon_model_compile_variants_numba_benchmark(
+    cache, radon_model, radon_model_variants, benchmark
+):
+    """Test compilation speed when a slightly variant of a function is compiled each time.
+
+    This test more realistically simulates a use case where a model is recompiled
+    multiple times with small changes, such as in an interactive environment.
+
+    NOTE: For this test to be meaningful on subsequent runs, the cache must be cleared
+    """
+    joined_inputs, [model_logp, model_dlogp] = radon_model
+    rng = np.random.default_rng(1)
+    x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
+
+    # Compile base function once to populate the cache
+    fn = function(
+        [joined_inputs], [model_logp, model_dlogp], mode="NUMBA", trust_input=True
+    )
+    fn(x)
+
+    def compile_and_call_once():
+        with config.change_flags(numba__cache=cache):
+            for joined_inputs, [model_logp, model_dlogp] in radon_model_variants:
+                fn = function(
+                    [joined_inputs],
+                    [model_logp, model_dlogp],
+                    mode="NUMBA",
+                    trust_input=True,
+                )
+                fn(x)
+
+    benchmark.pedantic(compile_and_call_once, rounds=1, iterations=1)
+
+
+@pytest.mark.parametrize("cache", (False, True))
+def test_radon_model_call_numba_benchmark(cache, radon_model, benchmark):
+    joined_inputs, [model_logp, model_dlogp] = radon_model
+
+    with config.change_flags(numba__cache=cache):
+        fn = function(
+            [joined_inputs], [model_logp, model_dlogp], mode="NUMBA", trust_input=True
+        )
+        rng = np.random.default_rng(1)
+    x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
+    fn(x)  # warmup
+
+    benchmark.pedantic(fn, (x,), rounds=10_000, iterations=10)