提交 3f457d07 authored 作者: Ricardo Vieira's avatar Ricardo Vieira 提交者: Ricardo Vieira

Benchmark radon model function

上级 d4eff40d
...@@ -3,6 +3,10 @@ import os ...@@ -3,6 +3,10 @@ import os
import pytest import pytest
# Using pytest_plugins causes `tests/link/c/test_cmodule.py::test_cache_versioning` to fail
# pytest_plugins = ["tests.fixtures"]
def pytest_sessionstart(session): def pytest_sessionstart(session):
os.environ["PYTENSOR_FLAGS"] = ",".join( os.environ["PYTENSOR_FLAGS"] = ",".join(
[ [
......
...@@ -150,7 +150,7 @@ lines-after-imports = 2 ...@@ -150,7 +150,7 @@ lines-after-imports = 2
"pytensor/misc/check_duplicate_key.py" = ["T201"] "pytensor/misc/check_duplicate_key.py" = ["T201"]
"pytensor/misc/check_blas.py" = ["T201"] "pytensor/misc/check_blas.py" = ["T201"]
"pytensor/bin/pytensor_cache.py" = ["T201"] "pytensor/bin/pytensor_cache.py" = ["T201"]
# For the tests we skip because `pytest.importorskip` is used: # For the tests we skip `E402` because `pytest.importorskip` is used:
"tests/link/jax/test_scalar.py" = ["E402"] "tests/link/jax/test_scalar.py" = ["E402"]
"tests/link/jax/test_tensor_basic.py" = ["E402"] "tests/link/jax/test_tensor_basic.py" = ["E402"]
"tests/link/numba/test_basic.py" = ["E402"] "tests/link/numba/test_basic.py" = ["E402"]
......
...@@ -33,6 +33,7 @@ from pytensor.tensor.type import ( ...@@ -33,6 +33,7 @@ from pytensor.tensor.type import (
scalars, scalars,
vector, vector,
) )
from tests.fixtures import * # noqa: F403
pytestmark = pytest.mark.filterwarnings("error") pytestmark = pytest.mark.filterwarnings("error")
...@@ -1357,3 +1358,67 @@ def test_minimal_random_function_call_benchmark(trust_input, benchmark): ...@@ -1357,3 +1358,67 @@ def test_minimal_random_function_call_benchmark(trust_input, benchmark):
rng_val = np.random.default_rng() rng_val = np.random.default_rng()
benchmark(f, rng_val) benchmark(f, rng_val)
@pytest.mark.parametrize("mode", ["C", "C_VM"])
def test_radon_model_compile_repeatedly_benchmark(mode, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
def compile_and_call_once():
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=5, iterations=1)
@pytest.mark.parametrize("mode", ["C", "C_VM"])
def test_radon_model_compile_variants_benchmark(
mode, radon_model, radon_model_variants, benchmark
):
"""Test compilation speed when a slightly variant of a function is compiled each time.
This test more realistically simulates a use case where a model is recompiled
multiple times with small changes, such as in an interactive environment.
NOTE: For this test to be meaningful on subsequent runs, the cache must be cleared
"""
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
# Compile base function once to populate the cache
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
)
fn(x)
def compile_and_call_once():
for joined_inputs, [model_logp, model_dlogp] in radon_model_variants:
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=1, iterations=1)
@pytest.mark.parametrize("mode", ["C", "C_VM", "C_VM_NOGC"])
def test_radon_model_call_benchmark(mode, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
real_mode = "C_VM" if mode == "C_VM_NOGC" else mode
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=real_mode, trust_input=True
)
if mode == "C_VM_NOGC":
fn.vm.allow_gc = False
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
fn(x) # warmup
benchmark(fn, x)
import numpy as np
import pytest
import pytensor.tensor as pt
from pytensor.graph.replace import graph_replace
from pytensor.graph.rewriting import rewrite_graph
from pytensor.graph.traversal import explicit_graph_inputs
def create_radon_model(
intercept_dist="normal", sigma_dist="halfnormal", centered=False
):
def halfnormal(name, *, sigma=1.0, model_logp):
log_value = pt.scalar(f"{name}_log")
value = pt.exp(log_value)
logp = (
-0.5 * ((value / sigma) ** 2) + pt.log(pt.sqrt(2.0 / np.pi)) - pt.log(sigma)
)
logp = pt.switch(value >= 0, logp, -np.inf)
model_logp.append(logp + value)
return value
def normal(name, *, mu=0.0, sigma=1.0, model_logp, observed=None):
value = pt.scalar(name) if observed is None else pt.as_tensor(observed)
logp = (
-0.5 * (((value - mu) / sigma) ** 2)
- pt.log(pt.sqrt(2.0 * np.pi))
- pt.log(sigma)
)
model_logp.append(logp)
return value
def lognormal(name, *, mu=0.0, sigma=1.0, model_logp):
value = normal(name, mu=mu, sigma=sigma, model_logp=model_logp)
return pt.exp(value)
def zerosumnormal(name, *, sigma=1.0, size, model_logp):
raw_value = pt.vector(f"{name}_zerosum", shape=(size - 1,))
n = raw_value.shape[0] + 1
sum_vals = raw_value.sum(0, keepdims=True)
norm = sum_vals / (pt.sqrt(n) + n)
fill_value = norm - sum_vals / pt.sqrt(n)
value = pt.concatenate([raw_value, fill_value]) - norm
shape = value.shape
_full_size = pt.prod(shape)
_degrees_of_freedom = pt.prod(shape[-1:].inc(-1))
logp = pt.sum(
-0.5 * ((value / sigma) ** 2)
- (pt.log(pt.sqrt(2.0 * np.pi)) + pt.log(sigma))
* (_degrees_of_freedom / _full_size)
)
model_logp.append(logp)
return value
dist_fn_map = {
fn.__name__: fn for fn in (halfnormal, normal, lognormal, zerosumnormal)
}
rng = np.random.default_rng(1)
n_counties = 85
county_idx = rng.integers(n_counties, size=919)
county_idx.sort()
floor = rng.binomial(n=1, p=0.5, size=919).astype(np.float64)
log_radon = rng.normal(size=919)
model_logp = []
intercept = dist_fn_map[intercept_dist](
"intercept", sigma=10, model_logp=model_logp
)
# County effects
county_sd = halfnormal("county_sd", model_logp=model_logp)
if centered:
county_effect = zerosumnormal(
"county_raw", sigma=county_sd, size=n_counties, model_logp=model_logp
)
else:
county_raw = zerosumnormal("county_raw", size=n_counties, model_logp=model_logp)
county_effect = county_raw * county_sd
# Global floor effect
floor_effect = normal("floor_effect", sigma=2, model_logp=model_logp)
county_floor_sd = halfnormal("county_floor_sd", model_logp=model_logp)
if centered:
county_floor_effect = zerosumnormal(
"county_floor_raw",
sigma=county_floor_sd,
size=n_counties,
model_logp=model_logp,
)
else:
county_floor_raw = zerosumnormal(
"county_floor_raw", size=n_counties, model_logp=model_logp
)
county_floor_effect = county_floor_raw * county_floor_sd
mu = (
intercept
+ county_effect[county_idx]
+ floor_effect * floor
+ county_floor_effect[county_idx] * floor
)
sigma = dist_fn_map[sigma_dist]("sigma", model_logp=model_logp)
_ = normal(
"log_radon",
mu=mu,
sigma=sigma,
observed=log_radon,
model_logp=model_logp,
)
model_logp = pt.sum([logp.sum() for logp in model_logp])
model_logp = rewrite_graph(
model_logp, include=("canonicalize", "stabilize"), clone=False
)
params = list(explicit_graph_inputs(model_logp))
model_dlogp = pt.concatenate([term.ravel() for term in pt.grad(model_logp, params)])
size = sum(int(np.prod(p.type.shape)) for p in params)
joined_inputs = pt.vector("joined_inputs", shape=(size,))
idx = 0
replacement = {}
for param in params:
param_shape = param.type.shape
param_size = int(np.prod(param_shape))
replacement[param] = joined_inputs[idx : idx + param_size].reshape(param_shape)
idx += param_size
assert idx == joined_inputs.type.shape[0]
model_logp, model_dlogp = graph_replace([model_logp, model_dlogp], replacement)
return joined_inputs, [model_logp, model_dlogp]
@pytest.fixture(scope="session")
def radon_model():
return create_radon_model()
@pytest.fixture(scope="session")
def radon_model_variants():
# Convert to list comp
return [
create_radon_model(
intercept_dist=intercept_dist,
sigma_dist=sigma_dist,
centered=centered,
)
for centered in (True, False)
for intercept_dist in ("normal", "lognormal")
for sigma_dist in ("halfnormal", "lognormal")
]
...@@ -13,6 +13,7 @@ from pytensor.compile.mode import Mode ...@@ -13,6 +13,7 @@ from pytensor.compile.mode import Mode
from pytensor.graph.rewriting.db import RewriteDatabaseQuery from pytensor.graph.rewriting.db import RewriteDatabaseQuery
from pytensor.link.numba.linker import NumbaLinker from pytensor.link.numba.linker import NumbaLinker
from pytensor.tensor.math import Max from pytensor.tensor.math import Max
from tests.fixtures import * # noqa: F403
opts = RewriteDatabaseQuery(include=[None], exclude=["cxx_only", "BlasOpt"]) opts = RewriteDatabaseQuery(include=[None], exclude=["cxx_only", "BlasOpt"])
...@@ -75,3 +76,72 @@ def test_careduce_performance(careduce_fn, numpy_fn, axis, inputs, input_vals): ...@@ -75,3 +76,72 @@ def test_careduce_performance(careduce_fn, numpy_fn, axis, inputs, input_vals):
# FIXME: Why are we asserting >=? Numba could be doing worse than numpy! # FIXME: Why are we asserting >=? Numba could be doing worse than numpy!
assert mean_numba_time / mean_numpy_time >= 0.75 assert mean_numba_time / mean_numpy_time >= 0.75
@pytest.mark.parametrize("cache", (False, True))
def test_radon_model_compile_repeatedly_numba_benchmark(cache, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
def compile_and_call_once():
with config.change_flags(numba__cache=cache):
fn = function(
[joined_inputs],
[model_logp, model_dlogp],
mode="NUMBA",
trust_input=True,
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=5, iterations=1)
@pytest.mark.parametrize("cache", (False, True))
def test_radon_model_compile_variants_numba_benchmark(
cache, radon_model, radon_model_variants, benchmark
):
"""Test compilation speed when a slightly variant of a function is compiled each time.
This test more realistically simulates a use case where a model is recompiled
multiple times with small changes, such as in an interactive environment.
NOTE: For this test to be meaningful on subsequent runs, the cache must be cleared
"""
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
# Compile base function once to populate the cache
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode="NUMBA", trust_input=True
)
fn(x)
def compile_and_call_once():
with config.change_flags(numba__cache=cache):
for joined_inputs, [model_logp, model_dlogp] in radon_model_variants:
fn = function(
[joined_inputs],
[model_logp, model_dlogp],
mode="NUMBA",
trust_input=True,
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=1, iterations=1)
@pytest.mark.parametrize("cache", (False, True))
def test_radon_model_call_numba_benchmark(cache, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
with config.change_flags(numba__cache=cache):
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode="NUMBA", trust_input=True
)
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
fn(x) # warmup
benchmark.pedantic(fn, (x,), rounds=10_000, iterations=10)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论