提交 3f457d07 authored 作者: Ricardo Vieira's avatar Ricardo Vieira 提交者: Ricardo Vieira

Benchmark radon model function

上级 d4eff40d
......@@ -3,6 +3,10 @@ import os
import pytest
# Using pytest_plugins causes `tests/link/c/test_cmodule.py::test_cache_versioning` to fail
# pytest_plugins = ["tests.fixtures"]
def pytest_sessionstart(session):
os.environ["PYTENSOR_FLAGS"] = ",".join(
[
......
......@@ -150,7 +150,7 @@ lines-after-imports = 2
"pytensor/misc/check_duplicate_key.py" = ["T201"]
"pytensor/misc/check_blas.py" = ["T201"]
"pytensor/bin/pytensor_cache.py" = ["T201"]
# For the tests we skip because `pytest.importorskip` is used:
# For the tests we skip `E402` because `pytest.importorskip` is used:
"tests/link/jax/test_scalar.py" = ["E402"]
"tests/link/jax/test_tensor_basic.py" = ["E402"]
"tests/link/numba/test_basic.py" = ["E402"]
......
......@@ -33,6 +33,7 @@ from pytensor.tensor.type import (
scalars,
vector,
)
from tests.fixtures import * # noqa: F403
pytestmark = pytest.mark.filterwarnings("error")
......@@ -1357,3 +1358,67 @@ def test_minimal_random_function_call_benchmark(trust_input, benchmark):
rng_val = np.random.default_rng()
benchmark(f, rng_val)
@pytest.mark.parametrize("mode", ["C", "C_VM"])
def test_radon_model_compile_repeatedly_benchmark(mode, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
def compile_and_call_once():
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=5, iterations=1)
@pytest.mark.parametrize("mode", ["C", "C_VM"])
def test_radon_model_compile_variants_benchmark(
mode, radon_model, radon_model_variants, benchmark
):
"""Test compilation speed when a slightly variant of a function is compiled each time.
This test more realistically simulates a use case where a model is recompiled
multiple times with small changes, such as in an interactive environment.
NOTE: For this test to be meaningful on subsequent runs, the cache must be cleared
"""
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
# Compile base function once to populate the cache
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
)
fn(x)
def compile_and_call_once():
for joined_inputs, [model_logp, model_dlogp] in radon_model_variants:
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=1, iterations=1)
@pytest.mark.parametrize("mode", ["C", "C_VM", "C_VM_NOGC"])
def test_radon_model_call_benchmark(mode, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
real_mode = "C_VM" if mode == "C_VM_NOGC" else mode
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=real_mode, trust_input=True
)
if mode == "C_VM_NOGC":
fn.vm.allow_gc = False
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
fn(x) # warmup
benchmark(fn, x)
import numpy as np
import pytest
import pytensor.tensor as pt
from pytensor.graph.replace import graph_replace
from pytensor.graph.rewriting import rewrite_graph
from pytensor.graph.traversal import explicit_graph_inputs
def create_radon_model(
intercept_dist="normal", sigma_dist="halfnormal", centered=False
):
def halfnormal(name, *, sigma=1.0, model_logp):
log_value = pt.scalar(f"{name}_log")
value = pt.exp(log_value)
logp = (
-0.5 * ((value / sigma) ** 2) + pt.log(pt.sqrt(2.0 / np.pi)) - pt.log(sigma)
)
logp = pt.switch(value >= 0, logp, -np.inf)
model_logp.append(logp + value)
return value
def normal(name, *, mu=0.0, sigma=1.0, model_logp, observed=None):
value = pt.scalar(name) if observed is None else pt.as_tensor(observed)
logp = (
-0.5 * (((value - mu) / sigma) ** 2)
- pt.log(pt.sqrt(2.0 * np.pi))
- pt.log(sigma)
)
model_logp.append(logp)
return value
def lognormal(name, *, mu=0.0, sigma=1.0, model_logp):
value = normal(name, mu=mu, sigma=sigma, model_logp=model_logp)
return pt.exp(value)
def zerosumnormal(name, *, sigma=1.0, size, model_logp):
raw_value = pt.vector(f"{name}_zerosum", shape=(size - 1,))
n = raw_value.shape[0] + 1
sum_vals = raw_value.sum(0, keepdims=True)
norm = sum_vals / (pt.sqrt(n) + n)
fill_value = norm - sum_vals / pt.sqrt(n)
value = pt.concatenate([raw_value, fill_value]) - norm
shape = value.shape
_full_size = pt.prod(shape)
_degrees_of_freedom = pt.prod(shape[-1:].inc(-1))
logp = pt.sum(
-0.5 * ((value / sigma) ** 2)
- (pt.log(pt.sqrt(2.0 * np.pi)) + pt.log(sigma))
* (_degrees_of_freedom / _full_size)
)
model_logp.append(logp)
return value
dist_fn_map = {
fn.__name__: fn for fn in (halfnormal, normal, lognormal, zerosumnormal)
}
rng = np.random.default_rng(1)
n_counties = 85
county_idx = rng.integers(n_counties, size=919)
county_idx.sort()
floor = rng.binomial(n=1, p=0.5, size=919).astype(np.float64)
log_radon = rng.normal(size=919)
model_logp = []
intercept = dist_fn_map[intercept_dist](
"intercept", sigma=10, model_logp=model_logp
)
# County effects
county_sd = halfnormal("county_sd", model_logp=model_logp)
if centered:
county_effect = zerosumnormal(
"county_raw", sigma=county_sd, size=n_counties, model_logp=model_logp
)
else:
county_raw = zerosumnormal("county_raw", size=n_counties, model_logp=model_logp)
county_effect = county_raw * county_sd
# Global floor effect
floor_effect = normal("floor_effect", sigma=2, model_logp=model_logp)
county_floor_sd = halfnormal("county_floor_sd", model_logp=model_logp)
if centered:
county_floor_effect = zerosumnormal(
"county_floor_raw",
sigma=county_floor_sd,
size=n_counties,
model_logp=model_logp,
)
else:
county_floor_raw = zerosumnormal(
"county_floor_raw", size=n_counties, model_logp=model_logp
)
county_floor_effect = county_floor_raw * county_floor_sd
mu = (
intercept
+ county_effect[county_idx]
+ floor_effect * floor
+ county_floor_effect[county_idx] * floor
)
sigma = dist_fn_map[sigma_dist]("sigma", model_logp=model_logp)
_ = normal(
"log_radon",
mu=mu,
sigma=sigma,
observed=log_radon,
model_logp=model_logp,
)
model_logp = pt.sum([logp.sum() for logp in model_logp])
model_logp = rewrite_graph(
model_logp, include=("canonicalize", "stabilize"), clone=False
)
params = list(explicit_graph_inputs(model_logp))
model_dlogp = pt.concatenate([term.ravel() for term in pt.grad(model_logp, params)])
size = sum(int(np.prod(p.type.shape)) for p in params)
joined_inputs = pt.vector("joined_inputs", shape=(size,))
idx = 0
replacement = {}
for param in params:
param_shape = param.type.shape
param_size = int(np.prod(param_shape))
replacement[param] = joined_inputs[idx : idx + param_size].reshape(param_shape)
idx += param_size
assert idx == joined_inputs.type.shape[0]
model_logp, model_dlogp = graph_replace([model_logp, model_dlogp], replacement)
return joined_inputs, [model_logp, model_dlogp]
@pytest.fixture(scope="session")
def radon_model():
return create_radon_model()
@pytest.fixture(scope="session")
def radon_model_variants():
# Convert to list comp
return [
create_radon_model(
intercept_dist=intercept_dist,
sigma_dist=sigma_dist,
centered=centered,
)
for centered in (True, False)
for intercept_dist in ("normal", "lognormal")
for sigma_dist in ("halfnormal", "lognormal")
]
......@@ -13,6 +13,7 @@ from pytensor.compile.mode import Mode
from pytensor.graph.rewriting.db import RewriteDatabaseQuery
from pytensor.link.numba.linker import NumbaLinker
from pytensor.tensor.math import Max
from tests.fixtures import * # noqa: F403
opts = RewriteDatabaseQuery(include=[None], exclude=["cxx_only", "BlasOpt"])
......@@ -75,3 +76,72 @@ def test_careduce_performance(careduce_fn, numpy_fn, axis, inputs, input_vals):
# FIXME: Why are we asserting >=? Numba could be doing worse than numpy!
assert mean_numba_time / mean_numpy_time >= 0.75
@pytest.mark.parametrize("cache", (False, True))
def test_radon_model_compile_repeatedly_numba_benchmark(cache, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
def compile_and_call_once():
with config.change_flags(numba__cache=cache):
fn = function(
[joined_inputs],
[model_logp, model_dlogp],
mode="NUMBA",
trust_input=True,
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=5, iterations=1)
@pytest.mark.parametrize("cache", (False, True))
def test_radon_model_compile_variants_numba_benchmark(
cache, radon_model, radon_model_variants, benchmark
):
"""Test compilation speed when a slightly variant of a function is compiled each time.
This test more realistically simulates a use case where a model is recompiled
multiple times with small changes, such as in an interactive environment.
NOTE: For this test to be meaningful on subsequent runs, the cache must be cleared
"""
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
# Compile base function once to populate the cache
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode="NUMBA", trust_input=True
)
fn(x)
def compile_and_call_once():
with config.change_flags(numba__cache=cache):
for joined_inputs, [model_logp, model_dlogp] in radon_model_variants:
fn = function(
[joined_inputs],
[model_logp, model_dlogp],
mode="NUMBA",
trust_input=True,
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=1, iterations=1)
@pytest.mark.parametrize("cache", (False, True))
def test_radon_model_call_numba_benchmark(cache, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
with config.change_flags(numba__cache=cache):
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode="NUMBA", trust_input=True
)
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
fn(x) # warmup
benchmark.pedantic(fn, (x,), rounds=10_000, iterations=10)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论