提交 28521389 authored 作者: Ricardo Vieira's avatar Ricardo Vieira 提交者: ricardoV94

Use asv for benchmarking

上级 0bd33bfc
name: Benchmarks
on:
push:
branches:
- main
- v3
pull_request:
branches:
- main
- v3
concurrency:
group: benchmarks-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }}
cancel-in-progress: true
jobs:
benchmarks:
name: "Run benchmarks"
if: github.event_name == 'push'
runs-on: ubuntu-latest
permissions:
contents: write
pages: write
id-token: write
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 # zizmor: ignore[artipacked]
with:
fetch-depth: 0
# persist-credentials is true (default) because this job pushes to asv-results branch
- name: Set up Python
uses: mamba-org/setup-micromamba@add3a49764cedee8ee24e82dfde87f5bc2914462 # v2.0.7
with:
environment-name: pytensor-bench
micromamba-version: "1.5.10-0"
init-shell: bash
post-cleanup: "all"
cache-environment: true
create-args: >-
-c conda-forge
python=3.11
mkl
numpy
scipy
pip
mkl-service
cython
numba>=0.57
jax
jaxlib
asv
- name: Install dependencies
shell: micromamba-shell {0}
run: |
pip install -e ./
python -c 'import pytensor; print(pytensor.config.__str__(print_doc=False))'
python -c 'import pytensor; assert pytensor.config.blas__ldflags != "", "Blas flags are empty"'
- name: Fetch previous results from asv-results branch
shell: bash
run: |
git fetch origin asv-results:asv-results 2>/dev/null || true
if git rev-parse --verify asv-results 2>/dev/null; then
git worktree add /tmp/asv-results asv-results
if [ -d /tmp/asv-results/results ]; then
mkdir -p .asv/results
cp -r /tmp/asv-results/results/* .asv/results/
fi
git worktree remove /tmp/asv-results --force
fi
- name: Configure ASV machine
shell: micromamba-shell {0}
run: asv machine --yes --machine github-actions
- name: Run benchmarks
shell: micromamba-shell {0}
run: |
export PYTENSOR_FLAGS=warn__ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc__cxxflags=-pipe
asv run --python=same --set-commit-hash=$(git rev-parse HEAD) --show-stderr
- name: Push results to asv-results branch
shell: bash
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
# Create or update the asv-results branch
if git rev-parse --verify asv-results 2>/dev/null; then
git worktree add /tmp/asv-results asv-results
else
git worktree add --orphan -b asv-results /tmp/asv-results
cd /tmp/asv-results
git rm -rf . 2>/dev/null || true
cd -
fi
mkdir -p /tmp/asv-results/results
cp -r .asv/results/* /tmp/asv-results/results/
cd /tmp/asv-results
git add results/
git commit -m "Update benchmark results for ${{ github.sha }}" || true
git push origin asv-results
cd -
git worktree remove /tmp/asv-results --force
- name: Generate HTML
shell: micromamba-shell {0}
run: asv publish
- name: Upload Pages artifact
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3.0.1
with:
path: .asv/html
deploy-pages:
name: "Deploy benchmark dashboard"
if: github.event_name == 'push'
needs: benchmarks
runs-on: ubuntu-latest
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5
benchmarks-pr:
name: "Benchmark comparison"
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
permissions:
pull-requests: write
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
persist-credentials: false
- name: Set up Python
uses: mamba-org/setup-micromamba@add3a49764cedee8ee24e82dfde87f5bc2914462 # v2.0.7
with:
environment-name: pytensor-bench
micromamba-version: "1.5.10-0"
init-shell: bash
post-cleanup: "all"
cache-environment: true
create-args: >-
-c conda-forge
python=3.11
mkl
numpy
scipy
pip
mkl-service
cython
numba>=0.57
jax
jaxlib
asv
- name: Install dependencies
shell: micromamba-shell {0}
run: |
pip install -e ./
- name: Configure ASV machine
shell: micromamba-shell {0}
run: asv machine --yes --machine github-actions
- name: Run benchmark comparison
id: bench
shell: micromamba-shell {0}
env:
BASE_REF: ${{ github.base_ref }}
run: |
export PYTENSOR_FLAGS=warn__ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc__cxxflags=-pipe
HEAD_SHA=$(git rev-parse HEAD)
BASE_SHA=$(git merge-base "origin/$BASE_REF" HEAD)
# Benchmark the PR head (already installed)
asv run --python=same --set-commit-hash="$HEAD_SHA" --show-stderr
# Checkout base, reinstall, and benchmark
git checkout "$BASE_SHA"
pip install -e ./
asv run --python=same --set-commit-hash="$BASE_SHA" --show-stderr
# Return to PR head so asv.conf.json is available for compare
git checkout "$HEAD_SHA"
# Compare results (only regressions)
asv compare "$BASE_SHA" "$HEAD_SHA" --factor 1.2 --split --only-changed | tee bench_output.txt
# Check if there are regressions (lines after "Benchmarks that have got worse:")
if sed -n '/Benchmarks that have got worse:/,/^$/p' bench_output.txt | grep -qE '\S'; then
echo "has_regressions=true" >> "$GITHUB_OUTPUT"
fi
- name: Post benchmark regressions as PR comment
if: always() && steps.bench.outcome != 'cancelled' && steps.bench.outputs.has_regressions == 'true'
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
with:
script: |
const fs = require('fs');
const marker = '<!-- asv-benchmark-results -->';
let output = '';
try {
output = fs.readFileSync('bench_output.txt', 'utf8');
} catch (e) {
output = 'Benchmark comparison failed to produce results.';
}
const body = `${marker}\n## Benchmark regressions (main vs PR)\n\n\`\`\`\n${output}\n\`\`\`\n\nBenchmarks that regressed by more than 20% are shown.`;
// Find existing comment to update
const { data: comments } = await github.rest.issues.listComments({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
});
const existing = comments.find(c => c.body.includes(marker));
if (existing) {
await github.rest.issues.updateComment({
comment_id: existing.id,
owner: context.repo.owner,
repo: context.repo.repo,
body: body,
});
} else {
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body,
});
}
...@@ -40,7 +40,7 @@ jobs: ...@@ -40,7 +40,7 @@ jobs:
- name: "Install dependencies" - name: "Install dependencies"
shell: micromamba-shell {0} shell: micromamba-shell {0}
run: | run: |
micromamba install --yes -q -c conda-forge python=3.13 mkl "numpy>=2.0" scipy pip mkl-service graphviz cython pytest coverage pytest-cov pytest-benchmark pytest-mock pytest-sphinx micromamba install --yes -q -c conda-forge python=3.13 mkl "numpy>=2.0" scipy pip mkl-service graphviz cython pytest coverage pytest-cov pytest-mock pytest-sphinx
micromamba install --yes -q -c conda-forge "numba>=0.57" micromamba install --yes -q -c conda-forge "numba>=0.57"
micromamba install --yes -q -c conda-forge jax jaxlib numpyro equinox micromamba install --yes -q -c conda-forge jax jaxlib numpyro equinox
micromamba install --yes -q -c conda-forge mypy types-setuptools scipy-stubs pandas pre-commit micromamba install --yes -q -c conda-forge mypy types-setuptools scipy-stubs pandas pre-commit
......
...@@ -163,9 +163,9 @@ jobs: ...@@ -163,9 +163,9 @@ jobs:
run: | run: |
if [[ $OS == "macos-15" ]]; then if [[ $OS == "macos-15" ]]; then
micromamba install --yes -q "python~=${PYTHON_VERSION}" numpy "scipy<1.17.0" "numba>=0.63" pip graphviz cython pytest coverage pytest-cov pytest-benchmark pytest-mock pytest-sphinx libblas=*=*accelerate; micromamba install --yes -q "python~=${PYTHON_VERSION}" numpy "scipy<1.17.0" "numba>=0.63" pip graphviz cython pytest coverage pytest-cov pytest-mock pytest-sphinx libblas=*=*accelerate;
else else
micromamba install --yes -q "python~=${PYTHON_VERSION}" numpy "scipy<1.17.0" "numba>=0.63" pip graphviz cython pytest coverage pytest-cov pytest-benchmark pytest-mock pytest-sphinx mkl mkl-service; micromamba install --yes -q "python~=${PYTHON_VERSION}" numpy "scipy<1.17.0" "numba>=0.63" pip graphviz cython pytest coverage pytest-cov pytest-mock pytest-sphinx mkl mkl-service;
fi fi
if [[ $INSTALL_JAX == "1" ]]; then micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" && pip install "jax>=0.8,<0.9.1" jaxlib numpyro equinox tfp-nightly; fi if [[ $INSTALL_JAX == "1" ]]; then micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" && pip install "jax>=0.8,<0.9.1" jaxlib numpyro equinox tfp-nightly; fi
if [[ $INSTALL_TORCH == "1" ]]; then micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" pytorch pytorch-cuda=12.1 "mkl<=2024.0" -c pytorch -c nvidia; fi if [[ $INSTALL_TORCH == "1" ]]; then micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" pytorch pytorch-cuda=12.1 "mkl<=2024.0" -c pytorch -c nvidia; fi
...@@ -194,7 +194,7 @@ jobs: ...@@ -194,7 +194,7 @@ jobs:
if [[ $DEFAULT_MODE == "FAST_COMPILE" ]]; then export PYTENSOR_FLAGS=$PYTENSOR_FLAGS,mode=FAST_COMPILE; fi if [[ $DEFAULT_MODE == "FAST_COMPILE" ]]; then export PYTENSOR_FLAGS=$PYTENSOR_FLAGS,mode=FAST_COMPILE; fi
if [[ $DEFAULT_MODE == "CVM" ]]; then export PYTENSOR_FLAGS=$PYTENSOR_FLAGS,linker=cvm; fi if [[ $DEFAULT_MODE == "CVM" ]]; then export PYTENSOR_FLAGS=$PYTENSOR_FLAGS,linker=cvm; fi
export PYTENSOR_FLAGS=$PYTENSOR_FLAGS,warn__ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc__cxxflags=-pipe export PYTENSOR_FLAGS=$PYTENSOR_FLAGS,warn__ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc__cxxflags=-pipe
python -m pytest -r A --verbose --runslow --durations=50 --cov=pytensor/ --cov-report=xml:coverage/coverage-${MATRIX_ID}.xml --no-cov-on-fail $PART --benchmark-skip python -m pytest -r A --verbose --runslow --durations=50 --cov=pytensor/ --cov-report=xml:coverage/coverage-${MATRIX_ID}.xml --no-cov-on-fail $PART
env: env:
MATRIX_ID: ${{ steps.matrix-id.outputs.id }} MATRIX_ID: ${{ steps.matrix-id.outputs.id }}
MKL_THREADING_LAYER: GNU MKL_THREADING_LAYER: GNU
...@@ -209,60 +209,6 @@ jobs: ...@@ -209,60 +209,6 @@ jobs:
name: coverage-${{ steps.matrix-id.outputs.id }} name: coverage-${{ steps.matrix-id.outputs.id }}
path: coverage/coverage-${{ steps.matrix-id.outputs.id }}.xml path: coverage/coverage-${{ steps.matrix-id.outputs.id }}.xml
benchmarks:
name: "Benchmarks"
needs:
- changes
- style
runs-on: ubuntu-latest
if: ${{ needs.changes.outputs.changes == 'true' && needs.style.result == 'success' }}
strategy:
fail-fast: false
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
persist-credentials: false
- name: Set up Python 3.11
uses: mamba-org/setup-micromamba@add3a49764cedee8ee24e82dfde87f5bc2914462 # v2.0.7
with:
environment-name: pytensor-test
micromamba-version: "1.5.10-0" # until https://github.com/mamba-org/setup-micromamba/issues/225 is resolved
init-shell: bash
post-cleanup: "all"
- name: Install dependencies
shell: micromamba-shell {0}
run: |
micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" mkl numpy scipy pip mkl-service cython pytest "numba>=0.57" jax jaxlib pytest-benchmark
pip install -e ./
micromamba list && pip freeze
python -c 'import pytensor; print(pytensor.config.__str__(print_doc=False))'
python -c 'import pytensor; assert pytensor.config.blas__ldflags != "", "Blas flags are empty"'
env:
PYTHON_VERSION: 3.11
- name: Download previous benchmark data
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: ./cache
key: ${{ runner.os }}-benchmark
- name: Run benchmarks
shell: micromamba-shell {0}
run: |
export PYTENSOR_FLAGS=mode=FAST_COMPILE,warn__ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc__cxxflags=-pipe
python -m pytest --runslow --benchmark-only --benchmark-json output.json
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
with:
name: Python Benchmark with pytest-benchmark
tool: "pytest"
output-file-path: output.json
external-data-json-path: ./cache/benchmark-data.json
alert-threshold: "200%"
github-token: ${{ secrets.GITHUB_TOKEN }}
comment-on-alert: false
fail-on-alert: false
auto-push: false
all-checks: all-checks:
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
......
...@@ -49,9 +49,11 @@ core ...@@ -49,9 +49,11 @@ core
.mypy_cache/ .mypy_cache/
/htmlcov/ /htmlcov/
.venv/
pytensor-venv/ pytensor-venv/
/notebooks/Sandbox* /notebooks/Sandbox*
.vscode/ .vscode/
testing-report.html testing-report.html
coverage.xml coverage.xml
.coverage.* .coverage.*
.asv/
{
"version": 1,
"project": "pytensor",
"project_url": "https://github.com/pymc-devs/pytensor",
"repo": ".",
"branches": ["HEAD"],
"dvcs": "git",
"environment_type": "existing",
"benchmark_dir": "benchmarks",
"env_dir": ".asv/env",
"results_dir": ".asv/results",
"html_dir": ".asv/html"
}
import numpy as np
import pytensor
from pytensor import grad
from pytensor.tensor.math import log
from pytensor.tensor.nlinalg import diagonal
from pytensor.tensor.signal.conv import convolve1d
from pytensor.tensor.slinalg import cholesky, solve_triangular
from pytensor.tensor.type import dmatrix, tensor
class BatchedMVNormalLogpAndDlogp:
"""Benchmark batched multivariate normal log-probability and its gradient."""
params = [
[(), (1000,), (4, 1000)],
[(), (1000,), (4, 1000)],
]
param_names = ["mu_batch_shape", "cov_batch_shape"]
def setup(self, mu_batch_shape, cov_batch_shape):
rng = np.random.default_rng(sum(map(ord, "batched_mvnormal")))
value_batch_shape = mu_batch_shape
if len(cov_batch_shape) > len(mu_batch_shape):
value_batch_shape = cov_batch_shape
value = tensor("value", shape=(*value_batch_shape, 10))
mu = tensor("mu", shape=(*mu_batch_shape, 10))
cov = tensor("cov", shape=(*cov_batch_shape, 10, 10))
self.test_values = [
rng.normal(size=value.type.shape),
rng.normal(size=mu.type.shape),
np.eye(cov.type.shape[-1]) * np.abs(rng.normal(size=cov.type.shape)),
]
chol_cov = cholesky(cov, lower=True, on_error="raise")
delta_trans = solve_triangular(chol_cov, value - mu, b_ndim=1)
quaddist = (delta_trans**2).sum(axis=-1)
diag = diagonal(chol_cov, axis1=-2, axis2=-1)
logdet = log(diag).sum(axis=-1)
k = value.shape[-1]
norm = -0.5 * k * (np.log(2 * np.pi))
logp = norm - 0.5 * quaddist - logdet
dlogp = grad(logp.sum(), wrt=[value, mu, cov])
self.fn = pytensor.function([value, mu, cov], [logp, *dlogp])
def time_batched_mvnormal_logp_and_dlogp(self, mu_batch_shape, cov_batch_shape):
self.fn(*self.test_values)
class SmallBlockwisePerformance:
"""Benchmark small blockwise convolution."""
def setup(self):
a = dmatrix(shape=(7, 128))
b = dmatrix(shape=(7, 20))
out = convolve1d(a, b, mode="valid")
self.fn = pytensor.function([a, b], out, trust_input=True)
rng = np.random.default_rng(495)
self.a_test = rng.normal(size=a.type.shape)
self.b_test = rng.normal(size=b.type.shape)
def time_small_blockwise(self):
self.fn(self.a_test, self.b_test)
import numpy as np
from pytensor import config, function
from pytensor.compile.io import In
from pytensor.tensor.random.basic import normal
from pytensor.tensor.random.type import random_generator_type
from .common import create_radon_model
class MinimalRandomFunctionCall:
"""Benchmark calling a minimal random function."""
params = [True, False]
param_names = ["trust_input"]
def setup(self, trust_input):
rng = random_generator_type()
x = normal(rng=rng, size=(100,))
self.f = function([In(rng, mutable=True)], x)
self.f.trust_input = trust_input
self.rng_val = np.random.default_rng()
def time_call(self, trust_input):
self.f(self.rng_val)
class RadonModelCompileRepeatedly:
"""Benchmark repeated compilation and single call of the radon model."""
params = ["C", "CVM"]
param_names = ["mode"]
number = 1
repeat = 5
def setup(self, mode):
self.joined_inputs, [self.model_logp, self.model_dlogp] = create_radon_model()
rng = np.random.default_rng(1)
self.x = rng.normal(size=self.joined_inputs.type.shape).astype(config.floatX)
def time_compile_and_call(self, mode):
fn = function(
[self.joined_inputs],
[self.model_logp, self.model_dlogp],
mode=mode,
trust_input=True,
)
fn(self.x)
class RadonModelCompileVariants:
"""Benchmark compiling 8 variants of the radon model."""
params = ["C", "CVM"]
param_names = ["mode"]
number = 1
repeat = 5
def setup(self, mode):
# Build the base model and compile once to populate caches
self.joined_inputs, [self.model_logp, self.model_dlogp] = create_radon_model()
rng = np.random.default_rng(1)
self.x = rng.normal(size=self.joined_inputs.type.shape).astype(config.floatX)
fn = function(
[self.joined_inputs],
[self.model_logp, self.model_dlogp],
mode=mode,
trust_input=True,
)
fn(self.x)
# Build the 8 variants
self.radon_model_variants = [
create_radon_model(
intercept_dist=intercept_dist,
sigma_dist=sigma_dist,
centered=centered,
)
for centered in (True, False)
for intercept_dist in ("normal", "lognormal")
for sigma_dist in ("halfnormal", "lognormal")
]
def time_compile_variants(self, mode):
for joined_inputs, [model_logp, model_dlogp] in self.radon_model_variants:
fn = function(
[joined_inputs],
[model_logp, model_dlogp],
mode=mode,
trust_input=True,
)
fn(self.x)
class RadonModelCall:
"""Benchmark calling a pre-compiled radon model function."""
params = ["C", "CVM", "CVM_NOGC"]
param_names = ["mode"]
def setup(self, mode):
joined_inputs, [model_logp, model_dlogp] = create_radon_model()
real_mode = "CVM" if mode == "CVM_NOGC" else mode
self.fn = function(
[joined_inputs],
[model_logp, model_dlogp],
mode=real_mode,
trust_input=True,
)
if mode == "CVM_NOGC":
self.fn.vm.allow_gc = False
rng = np.random.default_rng(1)
self.x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
# Warmup
self.fn(self.x)
def time_call(self, mode):
self.fn(self.x)
import itertools
import numpy as np
import pytensor
import pytensor.tensor as pt
from pytensor import In, Out, config
from pytensor.gradient import grad
from pytensor.graph.fg import FunctionGraph
from pytensor.tensor.math import add, log
from pytensor.tensor.math import sum as pt_sum
from pytensor.tensor.rewriting.elemwise import FusionOptimizer
from pytensor.tensor.type import dscalar, dvector, tensor3
class DimShuffle:
"""Benchmark DimShuffle operations with various transpositions and expansions."""
params = [True, False]
param_names = ["c_contiguous"]
def setup(self, c_contiguous):
x = tensor3("x")
if c_contiguous:
self.x_val = np.random.random((2, 3, 4)).astype(config.floatX)
else:
self.x_val = np.random.random((200, 300, 400)).transpose(1, 2, 0)
ys = [x.transpose(t) for t in itertools.permutations((0, 1, 2))]
ys += [x[None], x[:, None], x[:, :, None], x[:, :, :, None]]
self.fn = pytensor.function(
[In(x, borrow=True)],
[Out(y, borrow=True) for y in ys],
mode="FAST_RUN",
)
self.fn.trust_input = True
# Warmup / JIT compile
self.fn(self.x_val)
def time_dimshuffle(self, c_contiguous):
self.fn(self.x_val)
class CAReduce:
"""Benchmark CAReduce (sum) over various axes and memory layouts."""
params = [
[0, 1, 2, (0, 1), (0, 2), (1, 2), None],
[True, False],
]
param_names = ["axis", "c_contiguous"]
def setup(self, axis, c_contiguous):
N = 256
x_test = np.random.uniform(size=(N, N, N))
transpose_axis = (0, 1, 2) if c_contiguous else (2, 0, 1)
x = pytensor.shared(x_test, name="x", shape=x_test.shape)
out = x.transpose(transpose_axis).sum(axis=axis)
self.fn = pytensor.function([], out, mode="FAST_RUN")
def time_careduce(self, axis, c_contiguous):
self.fn()
class ElemwiseEval:
"""Benchmark evaluation of a fused elemwise logp + gradient computation."""
def setup(self):
rng = np.random.default_rng(123)
size = 100_000
x = pytensor.shared(rng.normal(size=size), name="x")
mu = pytensor.shared(rng.normal(size=size), name="mu")
logp = -((x - mu) ** 2) / 2
grad_logp = grad(logp.sum(), x)
self.func = pytensor.function([], [logp, grad_logp], mode="FAST_RUN")
def time_eval(self):
self.func()
class FusionRewrite:
"""Benchmark the FusionOptimizer rewrite pass on different graph shapes."""
params = [
["deep_small_kernels", "large_fuseable_graph"],
[20, 25],
]
param_names = ["graph_fn", "n"]
number = 5
repeat = 7
@staticmethod
def large_fuseable_graph(n):
factors = []
sd = dscalar()
means = dvector()
cst_05 = pt.constant(0.5)
cst_m05 = pt.constant(-0.5)
cst_2 = pt.constant(2)
cst_m2 = pt.constant(-2)
ones = pt.constant(np.ones(10))
for i in range(n):
f = cst_m05 * sd**cst_m2 * (ones - means[i]) ** cst_2 + cst_05 * log(
cst_05 * (sd**cst_m2) / np.pi
)
factors.append(pt_sum(f))
logp = add(*factors)
vars = [sd, means]
dlogp = [pytensor.grad(logp, v) for v in vars]
return vars, dlogp
@staticmethod
def deep_small_kernels(n):
x = pt.matrix("x")
out = x
for _ in range(n):
out = pt.sin(out.T) + pt.cos(out)
return [x], [out]
def setup(self, graph_fn, n):
# Only run matching (graph_fn, n) combinations
valid = {
"deep_small_kernels": 20,
"large_fuseable_graph": 25,
}
if valid.get(graph_fn) != n:
raise NotImplementedError("Skip non-matching parameter combination")
builder = getattr(self, graph_fn)
inps, outs = builder(n)
self.fg = FunctionGraph(inps, outs)
self.opt = FusionOptimizer()
def time_rewrite(self, graph_fn, n):
fg_clone = self.fg.clone()
self.opt.apply(fg_clone)
import numpy as np
import pytensor
from pytensor import function
from pytensor.gradient import jacobian
from pytensor.tensor.math import outer, sqrt
from pytensor.tensor.type import vector
class Jacobian:
"""Benchmark full Jacobian computation."""
params = [True, False]
param_names = ["vectorize"]
def setup(self, vectorize):
x = vector("x", shape=(3,))
y = outer(x, x)
jac_y = jacobian(y, x, vectorize=vectorize)
self.fn = function([x], jac_y, trust_input=True)
self.x_val = np.array([0, 1, 2], dtype=x.type.dtype)
# Warmup
self.fn(self.x_val)
def time_jacobian(self, vectorize):
self.fn(self.x_val)
class PartialJacobian:
"""Benchmark partial Jacobian computation on a large graph."""
params = [True, False]
param_names = ["vectorize"]
def setup(self, vectorize):
N = 1000
rng = np.random.default_rng(2025)
self.x_test = rng.random((N,))
f_mat = rng.random((N, N))
x = vector("x", dtype="float64")
def f(x):
return sqrt(f_mat @ x / N)
full_jacobian = jacobian(f(x), x, vectorize=vectorize)
partial_jacobian = full_jacobian[:5, :5]
self.fn = pytensor.function([x], partial_jacobian, trust_input=True)
# Warmup
self.fn(self.x_test)
def time_partial_jacobian(self, vectorize):
self.fn(self.x_test)
from pytensor.graph.basic import Apply, Variable
from pytensor.graph.op import Op
from pytensor.graph.traversal import (
apply_ancestors,
toposort,
toposort_with_orderings,
variable_ancestors,
)
from pytensor.graph.type import Type
class MyType(Type):
def __init__(self, thingy):
self.thingy = thingy
def filter(self, *args, **kwargs):
raise NotImplementedError
def __eq__(self, other):
return type(self) is type(other) and self.thingy == other.thingy
def __hash__(self):
return hash(self.thingy)
def MyVariable(thingy):
return Variable(MyType(thingy), owner=None, name=f"v{thingy}")
class _MyOp(Op):
__props__ = ()
def make_node(self, *inputs):
outputs = [Variable(MyType(sum(i.type.thingy for i in inputs)), owner=None)]
return Apply(self, list(inputs), outputs)
def perform(self, *args, **kwargs):
raise NotImplementedError()
_my_op = _MyOp()
class Traversal:
"""Benchmark graph traversal operations on a deep graph."""
params = [
"variable_ancestors",
"variable_ancestors_with_blockers",
"apply_ancestors",
"apply_ancestors_with_blockers",
"toposort",
"toposort_with_blockers",
"toposort_with_orderings",
"toposort_with_orderings_and_blockers",
]
param_names = ["func_name"]
def setup(self, func_name):
r1 = MyVariable(1)
out = r1
for _ in range(50):
out = _my_op(out, out)
self.out = out
blocker = out.clone()
funcs = {
"variable_ancestors": lambda: all(variable_ancestors([self.out])),
"variable_ancestors_with_blockers": lambda: all(
variable_ancestors([self.out], blockers=[blocker])
),
"apply_ancestors": lambda: all(apply_ancestors([self.out])),
"apply_ancestors_with_blockers": lambda: all(
apply_ancestors([self.out], blockers=[blocker])
),
"toposort": lambda: all(toposort([self.out])),
"toposort_with_blockers": lambda: all(
toposort([self.out], blockers=[blocker])
),
"toposort_with_orderings": lambda: all(
toposort_with_orderings([self.out], orderings={self.out.owner: []})
),
"toposort_with_orderings_and_blockers": lambda: all(
toposort_with_orderings(
[self.out],
blockers=[blocker],
orderings={self.out.owner: []},
)
),
}
self.func = funcs[func_name]
def time_traversal(self, func_name):
self.func()
import numpy as np
import pytensor.tensor as pt
from pytensor import function, grad
from pytensor.scan.basic import scan
class JaxLogsumexp:
"""Benchmark JAX logsumexp-like computation."""
params = [[(10, 10), (1000, 1000)], [0, 1]]
param_names = ["size", "axis"]
def setup(self, size, axis):
try:
import jax # noqa: F401
except ImportError:
raise NotImplementedError("JAX not available")
X = pt.matrix("X")
X_max = pt.max(X, axis=axis, keepdims=True)
X_max = pt.switch(pt.isinf(X_max), 0, X_max)
X_lse = pt.log(pt.sum(pt.exp(X - X_max), axis=axis, keepdims=True)) + X_max
rng = np.random.default_rng(23920)
self.X_val = rng.normal(size=size)
self.fn = function([X], X_lse, mode="JAX")
self.fn(self.X_val) # JIT warmup
def time_logsumexp(self, size, axis):
self.fn(self.X_val)
class JaxScan:
"""Benchmark JAX scan with forward and backward passes."""
params = [["forward", "backward", "both"]]
param_names = ["mode"]
def setup(self, mode):
try:
import jax # noqa: F401
except ImportError:
raise NotImplementedError("JAX not available")
x0 = pt.vector("x0", shape=(10,), dtype="float64")
W = pt.matrix("W", shape=(10, 10), dtype="float64")
def step(x_prev, W):
return pt.tanh(pt.dot(x_prev, W))
result = scan(
fn=step,
outputs_info=[x0],
non_sequences=[W],
n_steps=50,
return_updates=False,
)
loss = result[-1].sum()
dloss = grad(loss, wrt=[x0, W])
if mode == "forward":
self.fn = function([x0, W], result, mode="JAX")
elif mode == "backward":
self.fn = function([x0, W], dloss, mode="JAX")
else: # both
self.fn = function([x0, W], [loss, *dloss], mode="JAX")
rng = np.random.default_rng(42)
self.x0_val = rng.normal(size=(10,))
self.W_val = rng.normal(size=(10, 10)) * 0.1
self.fn(self.x0_val, self.W_val) # JIT warmup
def time_scan(self, mode):
self.fn(self.x0_val, self.W_val)
import numpy as np
import pytensor
import pytensor.tensor as pt
from pytensor import config, function
from pytensor.compile.io import In
from pytensor.tensor.slinalg import cholesky
from pytensor.tensor.type import matrix
def _check_blas_c():
try:
from pytensor.tensor.blas_c import CGemv # noqa: F401
except ImportError:
raise NotImplementedError("C BLAS not available")
class GemvVectorDot:
"""Benchmark CGemv used as a vector dot product."""
def setup(self):
_check_blas_c()
from pytensor.tensor.blas_c import CGemv
n = 400_000
a = pt.vector("A", shape=(n,))
b = pt.vector("x", shape=(n,))
out = CGemv(inplace=True)(pt.empty((1,)), 1.0, a[None], b, 0.0)
self.fn = pytensor.function([a, b], out, accept_inplace=True, trust_input=True)
rng = np.random.default_rng(430)
self.test_a = rng.normal(size=n)
self.test_b = rng.normal(size=n)
def time_gemv_vector_dot(self):
self.fn(self.test_a, self.test_b)
class GemvNegativeStrides:
"""Benchmark CGemv with negative strides and Fortran layout."""
params = [[True, False], [True, False], [True, False]]
param_names = ["neg_stride0", "neg_stride1", "F_layout"]
def setup(self, neg_stride0, neg_stride1, F_layout):
_check_blas_c()
from pytensor.tensor.blas_c import CGemv
A = pt.matrix("A", shape=(512, 512))
x = pt.vector("x", shape=(512,))
y = pt.vector("y", shape=(512,))
out = CGemv(inplace=False)(y, 1.0, A, x, 1.0)
self.fn = pytensor.function([A, x, y], out, trust_input=True)
rng = np.random.default_rng(430)
test_A = rng.normal(size=(512, 512))
self.test_x = rng.normal(size=(512,))
self.test_y = rng.normal(size=(512,))
if F_layout:
test_A = np.asfortranarray(test_A)
if neg_stride0:
test_A = test_A[::-1]
if neg_stride1:
test_A = test_A[:, ::-1]
self.test_A = test_A
def time_gemv_negative_strides(self, neg_stride0, neg_stride1, F_layout):
self.fn(self.test_A, self.test_x, self.test_y)
class Ger:
"""Benchmark general rank-1 update (ger)."""
params = [[2**7, 2**9, 2**13], [True, False]]
param_names = ["n", "inplace"]
def setup(self, n, inplace):
alpha = pt.dscalar("alpha")
x = pt.dvector("x")
y = pt.dvector("y")
A = pt.dmatrix("A")
out = alpha * pt.outer(x, y) + A
self.fn = pytensor.function(
[alpha, x, y, In(A, mutable=inplace)], out, trust_input=True
)
rng = np.random.default_rng([2274, n])
self.alpha_test = rng.normal(size=())
self.x_test = rng.normal(size=(n,))
self.y_test = rng.normal(size=(n,))
self.A_test = rng.normal(size=(n, n))
def time_ger(self, n, inplace):
self.fn(self.alpha_test, self.x_test, self.y_test, self.A_test)
class Cholesky:
"""Benchmark Cholesky decomposition."""
def setup(self):
rng = np.random.default_rng(1234)
r = rng.standard_normal((10, 10)).astype(config.floatX)
self.pd = np.dot(r, r.T)
x = matrix()
chol = cholesky(x)
self.fn = function([x], chol)
def time_cholesky(self):
self.fn(self.pd)
import numpy as np
import pytensor
import pytensor.tensor as pt
from pytensor.gradient import grad
from pytensor.tensor.math import gammaincc
from pytensor.tensor.type import vector
class GammainccdkGrad:
"""Benchmark gradient of gammaincc with respect to k."""
def setup(self):
k = vector("k")
x = vector("x")
out = gammaincc(k, x)
self.grad_fn = pytensor.function(
[k, x], grad(out.sum(), wrt=[k]), mode="FAST_RUN", trust_input=True
)
self.vals = [
np.full((1000,), 3.2, dtype=k.dtype),
np.full((1000,), 0.01, dtype=x.dtype),
]
# Warmup
self.grad_fn(*self.vals)
def time_gammaincc_grad(self):
self.grad_fn(*self.vals)
class Hyp2F1Grad:
"""Benchmark gradient of hyp2f1 with few and many iterations."""
params = [["few_iters", "many_iters"], ["a", "all"]]
param_names = ["case", "wrt"]
_cases = {
"few_iters": (10.0, -2.0, 7.0, 0.7),
"many_iters": (3.5, 1.1, 2.0, 0.3),
}
def setup(self, case, wrt):
a1, a2, b1, z = pt.scalars("a1", "a2", "b1", "z")
hyp2f1_out = pt.hyp2f1(a1, a2, b1, z)
if wrt == "a":
hyp2f1_grad = pt.grad(hyp2f1_out, wrt=a1)
else:
hyp2f1_grad = pt.grad(hyp2f1_out, wrt=[a1, a2, b1, z])
self.f_grad = pytensor.function([a1, a2, b1, z], hyp2f1_grad, trust_input=True)
test_a1, test_a2, test_b1, test_z = self._cases[case]
self.test_vals = [
np.array(test_a1),
np.array(test_a2),
np.array(test_b1),
np.array(test_z),
]
# Warmup
self.f_grad(*self.test_vals)
def time_hyp2f1_grad(self, case, wrt):
self.f_grad(*self.test_vals)
差异被折叠。
import numpy as np
import pytensor
import pytensor.tensor as pt
from pytensor.compile.mode import get_default_mode
from pytensor.tensor.type import tensor
class BlockDiagDot:
"""Benchmark block_diag @ vector with and without the rewrite optimization."""
params = [[10, 100, 1000], [True, False]]
param_names = ["size", "rewrite"]
def setup(self, size, rewrite):
rng = np.random.default_rng(sum(ord(c) for c in f"{size}_{rewrite}"))
a_size = int(rng.uniform(1, int(0.8 * size)))
b_size = int(rng.uniform(1, int(0.8 * (size - a_size))))
c_size = size - a_size - b_size
a = tensor("a", shape=(a_size, a_size))
b = tensor("b", shape=(b_size, b_size))
c = tensor("c", shape=(c_size, c_size))
d = tensor("d", shape=(size,))
x = pt.linalg.block_diag(a, b, c)
out = x @ d
mode = get_default_mode()
if not rewrite:
mode = mode.excluding("local_block_diag_dot_to_dot_block_diag")
self.fn = pytensor.function([a, b, c, d], out, mode=mode)
self.a_val = rng.normal(size=a.type.shape).astype(a.type.dtype)
self.b_val = rng.normal(size=b.type.shape).astype(b.type.dtype)
self.c_val = rng.normal(size=c.type.shape).astype(c.type.dtype)
self.d_val = rng.normal(size=d.type.shape).astype(d.type.dtype)
# Warmup
self.fn(self.a_val, self.b_val, self.c_val, self.d_val)
def time_block_diag_dot(self, size, rewrite):
self.fn(self.a_val, self.b_val, self.c_val, self.d_val)
import numpy as np
import pytensor.tensor as pt
from pytensor import config, function, grad, shared
from pytensor.compile.mode import Mode
from pytensor.gradient import hessian
from pytensor.scan.basic import scan
from pytensor.tensor.math import dot
from pytensor.tensor.type import (
dmatrix,
dscalar,
dvector,
fvector,
iscalar,
matrix,
vector,
)
class CythonPerformance:
"""Benchmark scan with CVM linker (cython scan_perform)."""
def setup(self):
N = 200
M = -1 / np.arange(1, 11).astype(config.floatX)
r = np.arange(N * 10).astype(config.floatX).reshape(N, 10)
s_r = pt.as_tensor_variable(r, dtype=config.floatX)
s_y = scan(
fn=lambda ri, rii, M: ri + M * rii,
sequences=[s_r[1:]],
non_sequences=[pt.as_tensor_variable(M, dtype=config.floatX)],
outputs_info=s_r[0],
mode=Mode(linker="cvm", optimizer="fast_run"),
return_updates=False,
)
self.f_cvm = function([], s_y, mode="FAST_RUN")
self.f_cvm.trust_input = True
def time_cython_scan(self):
self.f_cvm()
class Reordering:
"""Benchmark RNN scan with multiple inputs/outputs and reordering."""
def setup(self):
rng = np.random.default_rng(1234)
vW_in2 = rng.uniform(-0.5, 0.5, size=(2,)).astype(config.floatX)
vW = rng.uniform(-0.5, 0.5, size=(2, 2)).astype(config.floatX)
vWout = rng.uniform(-0.5, 0.5, size=(2,)).astype(config.floatX)
self.vW_in1 = rng.uniform(-0.5, 0.5, size=(2, 2)).astype(config.floatX)
self.v_u1 = rng.uniform(-0.5, 0.5, size=(3, 2)).astype(config.floatX)
self.v_u2 = rng.uniform(-0.5, 0.5, size=(3,)).astype(config.floatX)
self.v_x0 = rng.uniform(-0.5, 0.5, size=(2,)).astype(config.floatX)
self.v_y0 = rng.uniform(size=(3,)).astype(config.floatX)
W_in2 = shared(vW_in2, name="win2")
W = shared(vW, name="w")
W_out = shared(vWout, name="wout")
W_in1 = matrix("win")
u1 = matrix("u1")
u2 = vector("u2")
x0 = vector("x0")
y0 = vector("y0")
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
return [
y_tm3 + 1,
y_tm3 + 2,
dot(u1_t, W_in1) + u2_t * W_in2 + dot(x_tm1, W),
y_tm1 + dot(x_tm1, W_out),
]
outputs = scan(
f_rnn_cmpl,
[u1, u2],
[None, None, x0, dict(initial=y0, taps=[-1, -3])],
W_in1,
n_steps=None,
truncate_gradient=-1,
go_backwards=False,
return_updates=False,
)
self.f = function([u1, u2, x0, y0, W_in1], outputs, allow_input_downcast=True)
def time_reordering(self):
self.f(self.v_u1, self.v_u2, self.v_x0, self.v_y0, self.vW_in1)
class ScanAsTensorOnGradients:
"""Benchmark compilation of gradient through scan."""
number = 1
repeat = 5
def setup(self):
to_scan = dvector("to_scan")
seq = dmatrix("seq")
f1 = dscalar("f1")
def scanStep(prev, seq, f1):
return prev + f1 * seq
scanned = scan(
fn=scanStep,
sequences=[seq],
outputs_info=[to_scan],
non_sequences=[f1],
return_updates=False,
)
function(
inputs=[to_scan, seq, f1],
outputs=scanned,
allow_input_downcast=True,
)
self.t_grad = grad(scanned.sum(), wrt=[to_scan, f1], consider_constant=[seq])
self.inputs = [to_scan, seq, f1]
def time_compile_grad(self):
function(
inputs=self.inputs,
outputs=self.t_grad,
allow_input_downcast=True,
)
class HessianBugGradGradTwoScans:
"""Benchmark nested scan with hessian computation."""
def setup(self):
W = fvector(name="W")
n_steps = iscalar(name="Nb_steps")
def loss_outer(sum_outer, W):
def loss_inner(sum_inner, W):
return sum_inner + (W**2).sum()
result_inner = scan(
fn=loss_inner,
outputs_info=pt.as_tensor_variable(np.asarray(0, dtype=np.float32)),
non_sequences=[W],
n_steps=1,
return_updates=False,
)
return sum_outer + result_inner[-1]
result_outer = scan(
fn=loss_outer,
outputs_info=pt.as_tensor_variable(np.asarray(0, dtype=np.float32)),
non_sequences=[W],
n_steps=n_steps,
return_list=True,
return_updates=False,
)
cost = result_outer[0][-1]
H = hessian(cost, W)
self.f = function([W, n_steps], H)
def time_hessian_two_scans(self):
self.f(np.ones((8,), dtype="float32"), 1)
class MultipleOutsTaps:
"""Benchmark complex RNN scan with multiple output taps."""
def setup(self):
l = 5
rng = np.random.default_rng(1234)
vW_in2 = rng.uniform(-2.0, 2.0, size=(2,)).astype(config.floatX)
vW = rng.uniform(-2.0, 2.0, size=(2, 2)).astype(config.floatX)
vWout = rng.uniform(-2.0, 2.0, size=(2,)).astype(config.floatX)
self.vW_in1 = rng.uniform(-2.0, 2.0, size=(2, 2)).astype(config.floatX)
self.v_u1 = rng.uniform(-2.0, 2.0, size=(l, 2)).astype(config.floatX)
self.v_u2 = rng.uniform(-2.0, 2.0, size=(l + 2, 2)).astype(config.floatX)
self.v_x0 = rng.uniform(-2.0, 2.0, size=(2,)).astype(config.floatX)
self.v_y0 = rng.uniform(size=(3,)).astype(config.floatX)
W_in2 = shared(vW_in2, name="win2")
W = shared(vW, name="w")
W_out = shared(vWout, name="wout")
W_in1 = matrix("win")
u1 = matrix("u1")
u2 = matrix("u2")
x0 = vector("x0")
y0 = vector("y0")
def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1, x_tm1, y_tm1, y_tm3, W_in1):
return [
dot(u1_t, W_in1) + (u2_t + u2_tm1 * u2_tp1) * W_in2 + dot(x_tm1, W),
(y_tm1 + y_tm3) * dot(x_tm1, W_out),
dot(u1_t, W_in1),
]
outputs = scan(
f_rnn_cmpl,
[u1, dict(input=u2, taps=[-1, 0, 1])],
[x0, dict(initial=y0, taps=[-1, -3]), None],
W_in1,
n_steps=None,
truncate_gradient=-1,
go_backwards=False,
return_updates=False,
)
self.f = function([u1, u2, x0, y0, W_in1], outputs, allow_input_downcast=True)
def time_multiple_outs_taps(self):
self.f(self.v_u1, self.v_u2, self.v_x0, self.v_y0, self.vW_in1)
class PregreedyOptimizer:
"""Benchmark scan with chained dot products (pregreedy optimizer path)."""
def setup(self):
W = pt.zeros((5, 4))
bv = pt.zeros((5,))
bh = pt.zeros((4,))
v = matrix("v")
(bv_t, bh_t) = scan(
lambda _: [bv, bh],
sequences=v,
outputs_info=[None, None],
return_updates=False,
)
chain = scan(
lambda x: dot(dot(x, W) + bh_t, W.T) + bv_t,
outputs_info=v,
n_steps=2,
return_updates=False,
)
self.chain_fn = function([v], chain)
self.v_data = np.zeros((3, 5), dtype=config.floatX)
def time_pregreedy_optimizer(self):
self.chain_fn(self.v_data)
class SavememOpt:
"""Benchmark scan with save_mem optimization."""
def setup(self):
y0 = shared(np.ones((2, 10)))
[_y1, y2] = scan(
lambda y: [y, y],
outputs_info=[dict(initial=y0, taps=[-2]), None],
n_steps=5,
return_updates=False,
)
self.fn = function([], y2.sum())
def time_savemem_opt(self):
self.fn()
import numpy as np
import pytensor
from pytensor import config
from pytensor.compile.io import In, Out
from pytensor.tensor.type import tensor3
class Reshape:
"""Benchmark reshape operations with multiple output shapes."""
def setup(self):
x = tensor3("x")
self.x_val = np.random.random((2, 3, 4)).astype(config.floatX)
y1 = x.reshape((6, 4))
y2 = x.reshape((2, 12))
y3 = x.reshape((-1,))
self.reshape_fn = pytensor.function(
[In(x, borrow=True)],
[Out(y1, borrow=True), Out(y2, borrow=True), Out(y3, borrow=True)],
)
self.reshape_fn.trust_input = True
# Warmup
self.reshape_fn(self.x_val)
def time_reshape(self):
self.reshape_fn(self.x_val)
import numpy as np
import pytensor
from pytensor.gradient import grad
from pytensor.tensor.signal.conv import convolve1d
from pytensor.tensor.type import tensor
class Convolve1dGrad:
"""Benchmark gradient of convolve1d with different modes."""
params = ["full", "valid"]
param_names = ["convolve_mode"]
def setup(self, convolve_mode):
larger = tensor("larger", shape=(8, None))
smaller = tensor("smaller", shape=(8, None))
grad_wrt_smaller = grad(
convolve1d(larger, smaller, mode=convolve_mode).sum(), wrt=smaller
)
self.fn = pytensor.function(
[larger, smaller], grad_wrt_smaller, trust_input=True
)
rng = np.random.default_rng([119, convolve_mode == "full"])
self.test_larger = rng.normal(size=(8, 1024)).astype(larger.type.dtype)
self.test_smaller = rng.normal(size=(8, 16)).astype(smaller.type.dtype)
# Warmup
self.fn(self.test_larger, self.test_smaller)
def time_convolve1d_grad(self, convolve_mode):
self.fn(self.test_larger, self.test_smaller)
import numpy as np
import pytensor
import pytensor.tensor.basic as ptb
from pytensor import Out
from pytensor.tensor.basic import join
from pytensor.tensor.subtensor import inc_subtensor, set_subtensor
from pytensor.tensor.type import matrices, vector, vectors
class AdvancedSubtensor1:
"""Benchmark advanced subtensor1 indexing."""
params = [[True, False], [True, False]]
param_names = ["static_shape", "gc"]
def setup(self, static_shape, gc):
x = vector("x", shape=(85 if static_shape else None,))
x_values = np.random.normal(size=(85,)).astype(x.type.dtype)
idxs_values = np.arange(85).repeat(11)
out = x[idxs_values]
self.fn = pytensor.function(
[x],
pytensor.Out(out, borrow=True),
on_unused_input="ignore",
trust_input=True,
)
self.fn.vm.allow_gc = gc
self.x_values = x_values
# Warmup
self.fn(self.x_values)
def time_advanced_subtensor1(self, static_shape, gc):
self.fn(self.x_values)
class AdvancedIncSubtensor1:
"""Benchmark advanced inc/set subtensor1 operations."""
params = [["inc_subtensor", "set_subtensor"], [True, False], [True, False]]
param_names = ["func", "static_shape", "gc"]
def setup(self, func, static_shape, gc):
func_map = {"inc_subtensor": inc_subtensor, "set_subtensor": set_subtensor}
subtensor_func = func_map[func]
x = vector("x", shape=(85 if static_shape else None,))
x_values = np.zeros((85,), dtype=x.type.dtype)
buffer = ptb.zeros_like(x)
y_values = np.random.normal(size=(85 * 11,)).astype(x.type.dtype)
idxs_values = np.arange(85).repeat(11)
out1 = subtensor_func(buffer[idxs_values], y_values)
out2 = subtensor_func(buffer[idxs_values[::-1]], y_values)
self.fn = pytensor.function(
[x],
[pytensor.Out(out1, borrow=True), pytensor.Out(out2, borrow=True)],
on_unused_input="ignore",
trust_input=True,
)
self.fn.vm.allow_gc = gc
self.x_values = x_values
# Warmup
self.fn(self.x_values)
def time_advanced_incsubtensor1(self, func, static_shape, gc):
self.fn(self.x_values)
class JoinPerformance:
"""Benchmark join (concatenation) with various dimensions and memory layouts."""
params = [[1, 2], [0, 1], ["C", "F", "Mixed"], [True, False]]
param_names = ["ndim", "axis", "memory_layout", "gc"]
def setup(self, ndim, axis, memory_layout, gc):
# Skip invalid combinations
if ndim == 1 and not (memory_layout == "C" and axis == 0):
raise NotImplementedError("Skip invalid combination")
n = 64
if ndim == 1:
inputs = vectors("abcdef")
else:
inputs = matrices("abcdef")
out = join(axis, *inputs)
self.fn = pytensor.function(inputs, Out(out, borrow=True), trust_input=True)
self.fn.vm.allow_gc = gc
test_values = [np.zeros((n, n)[:ndim], dtype=inputs[0].dtype) for _ in inputs]
if memory_layout == "F":
test_values = [np.asfortranarray(t) for t in test_values]
elif memory_layout == "Mixed":
test_values = [
np.asfortranarray(t) if i % 2 else t for i, t in enumerate(test_values)
]
self.test_values = test_values
# Warmup
self.fn(*self.test_values)
def time_join(self, ndim, axis, memory_layout, gc):
self.fn(*self.test_values)
"""Common utilities for ASV benchmarks."""
import sys
from pathlib import Path
# ASV doesn't add the repo root to sys.path, so `tests` isn't importable.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tests"))
from fixtures import create_radon_model
__all__ = ["create_radon_model"]
Benchmarking
============
PyTensor uses `airspeed velocity (ASV) <https://asv.readthedocs.io/>`_ for
performance benchmarking. Benchmarks are stored in the ``benchmarks/`` directory
and track performance across commits over time.
A `dashboard <https://pymc-devs.github.io/pytensor/>`_ is automatically updated
on each push to ``main``.
Quick start
-----------
Install ASV::
pip install asv virtualenv
Or with the benchmark extra::
pip install -e ".[benchmark]"
Running benchmarks
------------------
Run all benchmarks against your current working tree::
asv run --python=same --quick
The ``--python=same`` flag uses your current Python environment instead of
creating a new virtual environment. The ``--quick`` flag runs each benchmark
only once for a fast (but noisier) result.
For more accurate results, drop ``--quick``::
asv run --python=same
Run a specific benchmark module or class::
asv run --python=same --bench bench_compile
asv run --python=same --bench "bench_elemwise.CAReduce"
Run benchmarks matching a pattern::
asv run --python=same --bench ".*Numba.*"
Comparing branches
------------------
Compare the current branch against ``main``::
asv continuous --python=same main HEAD
This runs benchmarks on both commits and reports any regressions or
improvements. Use ``--factor`` to set the threshold for flagging changes::
asv continuous --python=same --factor 1.1 main HEAD
This flags benchmarks that changed by more than 10%.
Viewing results
---------------
Generate the HTML dashboard and open it in a browser::
asv publish
asv preview
This starts a local web server (typically at ``http://127.0.0.1:8080``) where
you can explore benchmark results interactively.
Profiling
---------
Profile a specific benchmark to identify bottlenecks::
asv profile bench_compile.RadonModelCall.time_call --python=same
This runs the benchmark under cProfile and displays the results.
Writing benchmarks
------------------
Benchmarks live in ``benchmarks/`` as Python files prefixed with ``bench_``.
Each file contains classes with:
- A ``setup()`` method for initialization (compilation, data generation).
This is **not** timed.
- Methods prefixed with ``time_`` that contain **only** the code to benchmark.
- ``params`` and ``param_names`` class attributes for parametrization.
Example::
import numpy as np
import pytensor
import pytensor.tensor as pt
class MyBenchmark:
params = [[10, 100, 1000]]
param_names = ["size"]
def setup(self, size):
x = pt.vector("x", shape=(size,))
self.fn = pytensor.function([x], pt.exp(x), trust_input=True)
self.x_val = np.random.normal(size=size)
self.fn(self.x_val) # warmup / JIT compile
def time_exp(self, size):
self.fn(self.x_val)
For benchmarks that require optional backends (Numba, JAX), raise
``NotImplementedError`` in ``setup()`` if the backend is not available::
def setup(self, ...):
try:
import numba # noqa: F401
except ImportError:
raise NotImplementedError("Numba not available")
See the `ASV documentation <https://asv.readthedocs.io/en/stable/writing_benchmarks.html>`_
for more details on writing benchmarks.
CI integration
--------------
Benchmarks run automatically in GitHub Actions:
- **On push to main**: Full benchmark suite runs and results are published to
the dashboard on GitHub Pages. Historical results are stored on the
``asv-results`` branch.
- **On pull requests**: ``asv continuous`` compares benchmarks between ``main``
and the PR head. Results are posted as a PR comment, flagging any benchmarks
that regressed by more than 10%.
...@@ -28,7 +28,6 @@ dependencies: ...@@ -28,7 +28,6 @@ dependencies:
- pytest - pytest
- pytest-cov - pytest-cov
- pytest-xdist - pytest-xdist
- pytest-benchmark
- pytest-mock - pytest-mock
- pytest-sphinx - pytest-sphinx
# For building docs # For building docs
......
...@@ -30,7 +30,6 @@ dependencies: ...@@ -30,7 +30,6 @@ dependencies:
- pytest - pytest
- pytest-cov - pytest-cov
- pytest-xdist - pytest-xdist
- pytest-benchmark
- pytest-mock - pytest-mock
- pytest-sphinx - pytest-sphinx
# For building docs # For building docs
......
...@@ -75,13 +75,13 @@ tests = [ ...@@ -75,13 +75,13 @@ tests = [
"pre-commit", "pre-commit",
"pytest-cov>=2.6.1", "pytest-cov>=2.6.1",
"coverage>=5.1", "coverage>=5.1",
"pytest-benchmark",
"pytest-mock", "pytest-mock",
"pytest-sphinx", "pytest-sphinx",
] ]
rtd = ["sphinx>=5.1.0,<6", "pygments", "pydot"] rtd = ["sphinx>=5.1.0,<6", "pygments", "pydot"]
jax = ["jax", "jaxlib"] jax = ["jax", "jaxlib"]
numba = ["numba>=0.57", "llvmlite"] numba = ["numba>=0.57", "llvmlite"]
benchmark = ["asv", "virtualenv"]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
include = ["pytensor*"] include = ["pytensor*"]
......
...@@ -20,8 +20,6 @@ from pytensor.link.vm import VMLinker ...@@ -20,8 +20,6 @@ from pytensor.link.vm import VMLinker
from pytensor.printing import debugprint from pytensor.printing import debugprint
from pytensor.tensor.math import dot, tanh from pytensor.tensor.math import dot, tanh
from pytensor.tensor.math import sum as pt_sum from pytensor.tensor.math import sum as pt_sum
from pytensor.tensor.random import normal
from pytensor.tensor.random.type import random_generator_type
from pytensor.tensor.type import ( from pytensor.tensor.type import (
dmatrix, dmatrix,
dscalar, dscalar,
...@@ -34,7 +32,6 @@ from pytensor.tensor.type import ( ...@@ -34,7 +32,6 @@ from pytensor.tensor.type import (
scalars, scalars,
vector, vector,
) )
from tests.fixtures import * # noqa: F403
pytestmark = pytest.mark.filterwarnings( pytestmark = pytest.mark.filterwarnings(
...@@ -1355,79 +1352,3 @@ def test_empty_givens_updates(): ...@@ -1355,79 +1352,3 @@ def test_empty_givens_updates():
y = x * 2 y = x * 2
function([In(x)], y, givens={}) function([In(x)], y, givens={})
function([In(x)], y, updates={}) function([In(x)], y, updates={})
@pytest.mark.parametrize("trust_input", [True, False])
def test_minimal_random_function_call_benchmark(trust_input, benchmark):
rng = random_generator_type()
x = normal(rng=rng, size=(100,))
f = function([In(rng, mutable=True)], x)
f.trust_input = trust_input
rng_val = np.random.default_rng()
benchmark(f, rng_val)
@pytest.mark.parametrize("mode", ["C", "CVM"])
def test_radon_model_compile_repeatedly_benchmark(mode, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
def compile_and_call_once():
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=5, iterations=1)
@pytest.mark.parametrize("mode", ["C", "CVM"])
def test_radon_model_compile_variants_benchmark(
mode, radon_model, radon_model_variants, benchmark
):
"""Test compilation speed when a slightly variant of a function is compiled each time.
This test more realistically simulates a use case where a model is recompiled
multiple times with small changes, such as in an interactive environment.
NOTE: For this test to be meaningful on subsequent runs, the cache must be cleared
"""
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
# Compile base function once to populate the cache
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
)
fn(x)
def compile_and_call_once():
for joined_inputs, [model_logp, model_dlogp] in radon_model_variants:
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=mode, trust_input=True
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=1, iterations=1)
@pytest.mark.parametrize("mode", ["C", "CVM", "CVM_NOGC"])
def test_radon_model_call_benchmark(mode, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
real_mode = "CVM" if mode == "CVM_NOGC" else mode
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode=real_mode, trust_input=True
)
if mode == "CVM_NOGC":
fn.vm.allow_gc = False
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
fn(x) # warmup
benchmark(fn, x)
import numpy as np import numpy as np
import pytest
import pytensor.tensor as pt import pytensor.tensor as pt
from pytensor.graph.replace import graph_replace from pytensor.graph.replace import graph_replace
...@@ -134,23 +133,3 @@ def create_radon_model( ...@@ -134,23 +133,3 @@ def create_radon_model(
model_logp, model_dlogp = graph_replace([model_logp, model_dlogp], replacement) model_logp, model_dlogp = graph_replace([model_logp, model_dlogp], replacement)
return joined_inputs, [model_logp, model_dlogp] return joined_inputs, [model_logp, model_dlogp]
@pytest.fixture(scope="session")
def radon_model():
return create_radon_model()
@pytest.fixture(scope="session")
def radon_model_variants():
# Convert to list comp
return [
create_radon_model(
intercept_dist=intercept_dist,
sigma_dist=sigma_dist,
centered=centered,
)
for centered in (True, False)
for intercept_dist in ("normal", "lognormal")
for sigma_dist in ("halfnormal", "lognormal")
]
...@@ -4,17 +4,13 @@ from pytensor import Variable, shared ...@@ -4,17 +4,13 @@ from pytensor import Variable, shared
from pytensor import tensor as pt from pytensor import tensor as pt
from pytensor.graph import Apply, ancestors, graph_inputs from pytensor.graph import Apply, ancestors, graph_inputs
from pytensor.graph.traversal import ( from pytensor.graph.traversal import (
apply_ancestors,
apply_depends_on, apply_depends_on,
explicit_graph_inputs, explicit_graph_inputs,
general_toposort, general_toposort,
get_var_by_name, get_var_by_name,
io_toposort, io_toposort,
orphans_between, orphans_between,
toposort,
toposort_with_orderings,
truncated_graph_inputs, truncated_graph_inputs,
variable_ancestors,
variable_depends_on, variable_depends_on,
vars_between, vars_between,
walk, walk,
...@@ -406,37 +402,3 @@ def test_get_var_by_name(): ...@@ -406,37 +402,3 @@ def test_get_var_by_name():
exp_res = igo.fgraph.outputs[0] exp_res = igo.fgraph.outputs[0]
assert res == exp_res assert res == exp_res
@pytest.mark.parametrize(
"func",
[
lambda x: all(variable_ancestors([x])),
lambda x: all(variable_ancestors([x], blockers=[x.clone()])),
lambda x: all(apply_ancestors([x])),
lambda x: all(apply_ancestors([x], blockers=[x.clone()])),
lambda x: all(toposort([x])),
lambda x: all(toposort([x], blockers=[x.clone()])),
lambda x: all(toposort_with_orderings([x], orderings={x: []})),
lambda x: all(
toposort_with_orderings([x], blockers=[x.clone()], orderings={x: []})
),
],
ids=[
"variable_ancestors",
"variable_ancestors_with_blockers",
"apply_ancestors",
"apply_ancestors_with_blockers)",
"toposort",
"toposort_with_blockers",
"toposort_with_orderings",
"toposort_with_orderings_and_blockers",
],
)
def test_traversal_benchmark(func, benchmark):
r1 = MyVariable(1)
out = r1
for i in range(50):
out = MyOp(out, out)
benchmark(func, out)
import numpy as np import numpy as np
import pytest import pytest
import scipy.special
import pytensor
import pytensor.tensor as pt import pytensor.tensor as pt
from pytensor.compile import get_mode from pytensor.compile import get_mode
from pytensor.configdefaults import config from pytensor.configdefaults import config
...@@ -98,28 +96,6 @@ def test_softmax_grad(axis): ...@@ -98,28 +96,6 @@ def test_softmax_grad(axis):
compare_jax_and_py([dy, sm], [out], [dy_test_value, sm_test_value]) compare_jax_and_py([dy, sm], [out], [dy_test_value, sm_test_value])
@pytest.mark.parametrize("size", [(10, 10), (1000, 1000)])
@pytest.mark.parametrize("axis", [0, 1])
def test_logsumexp_benchmark(size, axis, benchmark):
X = pt.matrix("X")
X_max = pt.max(X, axis=axis, keepdims=True)
X_max = pt.switch(pt.isinf(X_max), 0, X_max)
X_lse = pt.log(pt.sum(pt.exp(X - X_max), axis=axis, keepdims=True)) + X_max
rng = np.random.default_rng(23920)
X_val = rng.normal(size=size)
X_lse_fn = pytensor.function([X], X_lse, mode="JAX")
# JIT compile first
_ = X_lse_fn(X_val)
res = benchmark(X_lse_fn, X_val)
exp_res = scipy.special.logsumexp(X_val, axis=axis, keepdims=True)
np.testing.assert_array_almost_equal(res, exp_res)
def test_multiple_input_multiply(): def test_multiple_input_multiply():
x, y, z = vectors("xyz") x, y, z = vectors("xyz")
out = pt.mul(x, y, z) out = pt.mul(x, y, z)
......
...@@ -4,7 +4,7 @@ import numpy as np ...@@ -4,7 +4,7 @@ import numpy as np
import pytest import pytest
import pytensor.tensor as pt import pytensor.tensor as pt
from pytensor import function, ifelse, shared from pytensor import function, shared
from pytensor.compile import get_mode from pytensor.compile import get_mode
from pytensor.configdefaults import config from pytensor.configdefaults import config
from pytensor.graph import Apply, Op from pytensor.graph import Apply, Op
...@@ -12,8 +12,7 @@ from pytensor.scan import until ...@@ -12,8 +12,7 @@ from pytensor.scan import until
from pytensor.scan.basic import scan from pytensor.scan.basic import scan
from pytensor.scan.op import Scan, ScanInfo from pytensor.scan.op import Scan, ScanInfo
from pytensor.tensor import as_tensor, empty, random from pytensor.tensor import as_tensor, empty, random
from pytensor.tensor.math import gammaln, log from pytensor.tensor.type import dmatrix, dvector, matrix, scalar
from pytensor.tensor.type import dmatrix, dvector, matrix, scalar, vector
from tests.link.jax.test_basic import compare_jax_and_py from tests.link.jax.test_basic import compare_jax_and_py
from tests.scan.test_basic import ScanCompatibilityTests from tests.scan.test_basic import ScanCompatibilityTests
...@@ -393,242 +392,6 @@ def test_dynamic_sequence_length(): ...@@ -393,242 +392,6 @@ def test_dynamic_sequence_length():
np.testing.assert_allclose(f2(np.zeros((0, 3))), np.empty((0, 3))) np.testing.assert_allclose(f2(np.zeros((0, 3))), np.empty((0, 3)))
def SEIR_model_logp():
"""Setup a Scan implementation of a SEIR model.
SEIR model definition:
S[t+1] = S[t] - B[t]
E[t+1] = E[t] +B[t] - C[t]
I[t+1] = I[t+1] + C[t] - D[t]
B[t] ~ Binom(S[t], beta)
C[t] ~ Binom(E[t], gamma)
D[t] ~ Binom(I[t], delta)
"""
def binomln(n, k):
return gammaln(n + 1) - gammaln(k + 1) - gammaln(n - k + 1)
def binom_log_prob(n, p, value):
return binomln(n, value) + value * log(p) + (n - value) * log(1 - p)
# sequences
C_t = vector("C_t", dtype="int32", shape=(1200,))
D_t = vector("D_t", dtype="int32", shape=(1200,))
# outputs_info (initial conditions)
st0 = scalar("s_t0")
et0 = scalar("e_t0")
it0 = scalar("i_t0")
# non_sequences
beta = scalar("beta")
gamma = scalar("gamma")
delta = scalar("delta")
def seir_one_step(ct0, dt0, st0, et0, it0, beta, gamma, delta):
# bt0 = trng.binomial(n=st0, p=beta)
bt0 = st0 * beta
bt0 = bt0.astype(st0.dtype)
logp_c1 = binom_log_prob(et0, gamma, ct0)
logp_d1 = binom_log_prob(it0, delta, dt0)
st1 = st0 - bt0
et1 = et0 + bt0 - ct0
it1 = it0 + ct0 - dt0
return st1, et1, it1, logp_c1, logp_d1
(st, et, it, logp_c_all, logp_d_all) = scan(
fn=seir_one_step,
sequences=[C_t, D_t],
outputs_info=[st0, et0, it0, None, None],
non_sequences=[beta, gamma, delta],
return_updates=False,
)
st.name = "S_t"
et.name = "E_t"
it.name = "I_t"
logp_c_all.name = "C_t_logp"
logp_d_all.name = "D_t_logp"
st0_val, et0_val, it0_val = np.array(100.0), np.array(50.0), np.array(25.0)
beta_val, gamma_val, delta_val = (
np.array(0.277792),
np.array(0.135330),
np.array(0.108753),
)
C_t_val = np.array([3, 5, 8, 13, 21, 26, 10, 3] * 150, dtype=np.int32)
D_t_val = np.array([1, 2, 3, 7, 9, 11, 5, 1] * 150, dtype=np.int32)
assert C_t_val.shape == D_t_val.shape == C_t.type.shape == D_t.type.shape
test_input_vals = [
C_t_val,
D_t_val,
st0_val,
et0_val,
it0_val,
beta_val,
gamma_val,
delta_val,
]
loss_graph = logp_c_all.sum() + logp_d_all.sum()
return dict(
graph_inputs=[C_t, D_t, st0, et0, it0, beta, gamma, delta],
differentiable_vars=[st0, et0, it0, beta, gamma, delta],
test_input_vals=test_input_vals,
loss_graph=loss_graph,
)
def cyclical_reduction():
"""Setup a Scan implementation of the cyclical reduction algorithm.
This solves the matrix equation A @ X @ X + B @ X + C = 0 for X
Adapted from https://github.com/jessegrabowski/gEconpy/blob/da495b22ac383cb6cb5dec15f305506aebef7302/gEconpy/solvers/cycle_reduction.py#L187
"""
def stabilize(x, jitter=1e-16):
return x + jitter * pt.eye(x.shape[0])
def step(A0, A1, A2, A1_hat, norm, step_num, tol):
def cycle_step(A0, A1, A2, A1_hat, _norm, step_num):
tmp = pt.dot(
pt.vertical_stack(A0, A2),
pt.linalg.solve(
stabilize(A1),
pt.horizontal_stack(A0, A2),
assume_a="gen",
check_finite=False,
),
)
n = A0.shape[0]
idx_0 = pt.arange(n)
idx_1 = idx_0 + n
A1 = A1 - tmp[idx_0, :][:, idx_1] - tmp[idx_1, :][:, idx_0]
A0 = -tmp[idx_0, :][:, idx_0]
A2 = -tmp[idx_1, :][:, idx_1]
A1_hat = A1_hat - tmp[idx_1, :][:, idx_0]
A0_L1_norm = pt.linalg.norm(A0, ord=1)
return A0, A1, A2, A1_hat, A0_L1_norm, step_num + 1
return ifelse(
norm < tol,
(A0, A1, A2, A1_hat, norm, step_num),
cycle_step(A0, A1, A2, A1_hat, norm, step_num),
)
A = pt.matrix("A", shape=(20, 20))
B = pt.matrix("B", shape=(20, 20))
C = pt.matrix("C", shape=(20, 20))
norm = np.array(1e9, dtype="float64")
step_num = pt.zeros((), dtype="int32")
max_iter = 100
tol = 1e-7
(*_, A1_hat, norm, _n_steps) = scan(
step,
outputs_info=[A, B, C, B, norm, step_num],
non_sequences=[tol],
n_steps=max_iter,
return_updates=False,
)
A1_hat = A1_hat[-1]
T = -pt.linalg.solve(stabilize(A1_hat), A, assume_a="gen", check_finite=False)
rng = np.random.default_rng(sum(map(ord, "cycle_reduction")))
n = A.type.shape[0]
A_test = rng.standard_normal(size=(n, n))
C_test = rng.standard_normal(size=(n, n))
# B must be invertible, so we make it symmetric positive-definite
B_rand = rng.standard_normal(size=(n, n))
B_test = B_rand @ B_rand.T + np.eye(n) * 1e-3
return dict(
graph_inputs=[A, B, C],
differentiable_vars=[A, B, C],
test_input_vals=[A_test, B_test, C_test],
loss_graph=pt.sum(T),
)
@pytest.mark.parametrize("gradient_backend", ["PYTENSOR", "JAX"])
@pytest.mark.parametrize("mode", ("0forward", "1backward", "2both"))
@pytest.mark.parametrize("model", [cyclical_reduction, SEIR_model_logp])
def test_scan_benchmark(model, mode, gradient_backend, benchmark):
model_dict = model()
graph_inputs = model_dict["graph_inputs"]
differentiable_vars = model_dict["differentiable_vars"]
loss_graph = model_dict["loss_graph"]
test_input_vals = model_dict["test_input_vals"]
if gradient_backend == "PYTENSOR":
backward_loss = pt.grad(
loss_graph,
wrt=differentiable_vars,
)
match mode:
# TODO: Restore original test separately
case "0forward":
graph_outputs = [loss_graph]
case "1backward":
graph_outputs = backward_loss
case "2both":
graph_outputs = [loss_graph, *backward_loss]
case _:
raise ValueError(f"Unknown mode: {mode}")
jax_fn, _ = compare_jax_and_py(
graph_inputs,
graph_outputs,
test_input_vals,
jax_mode="JAX",
)
jax_fn.trust_input = True
else: # gradient_backend == "JAX"
import jax
loss_fn_tuple = function(graph_inputs, loss_graph, mode="JAX").vm.jit_fn
def loss_fn(*args):
return loss_fn_tuple(*args)[0]
match mode:
case "0forward":
jax_fn = jax.jit(loss_fn_tuple)
case "1backward":
jax_fn = jax.jit(
jax.grad(loss_fn, argnums=tuple(range(len(graph_inputs))[2:]))
)
case "2both":
value_and_grad_fn = jax.value_and_grad(
loss_fn, argnums=tuple(range(len(graph_inputs))[2:])
)
@jax.jit
def jax_fn(*args):
loss, grads = value_and_grad_fn(*args)
return loss, *grads
case _:
raise ValueError(f"Unknown mode: {mode}")
def block_until_ready(*inputs, jax_fn=jax_fn):
return [o.block_until_ready() for o in jax_fn(*inputs)]
block_until_ready(*test_input_vals) # Warmup
benchmark.pedantic(block_until_ready, test_input_vals, rounds=200, iterations=1)
def test_higher_order_derivatives(): def test_higher_order_derivatives():
ScanCompatibilityTests.check_higher_order_derivative(mode="JAX") ScanCompatibilityTests.check_higher_order_derivative(mode="JAX")
......
import numpy as np import numpy as np
import pytest import pytest
import scipy
from pytensor import config, function from pytensor import config
from pytensor.tensor.basic import switch from pytensor.tensor.basic import switch
from pytensor.tensor.math import ( from pytensor.tensor.math import (
add, add,
...@@ -14,7 +13,6 @@ from pytensor.tensor.math import ( ...@@ -14,7 +13,6 @@ from pytensor.tensor.math import (
ge, ge,
gt, gt,
int_div, int_div,
isinf,
isnan, isnan,
le, le,
log, log,
...@@ -107,28 +105,6 @@ def test_logsoftmax(axis): ...@@ -107,28 +105,6 @@ def test_logsoftmax(axis):
compare_mlx_and_py([x], [out], [x_test_value]) compare_mlx_and_py([x], [out], [x_test_value])
@pytest.mark.parametrize("size", [(10, 10), (1000, 1000)])
@pytest.mark.parametrize("axis", [0, 1])
def test_logsumexp_benchmark(size, axis, benchmark):
X = matrix("X")
X_max = pt_max(X, axis=axis, keepdims=True)
X_max = switch(isinf(X_max), 0, X_max)
X_lse = log(pt_sum(exp(X - X_max), axis=axis, keepdims=True)) + X_max
rng = np.random.default_rng(23920)
X_val = rng.normal(size=size)
X_lse_fn = function([X], X_lse, mode="MLX")
# JIT compile first
_ = X_lse_fn(X_val)
res = benchmark(X_lse_fn, X_val)
exp_res = scipy.special.logsumexp(X_val, axis=axis, keepdims=True)
np.testing.assert_array_almost_equal(res, exp_res)
def test_multiple_input_multiply(): def test_multiple_input_multiply():
x, y, z = vectors("xyz") x, y, z = vectors("xyz")
out = mul(x, y, z) out = mul(x, y, z)
......
from functools import partial
import numpy as np import numpy as np
import pytest import pytest
from pytensor import function from pytensor.tensor import dmatrix
from pytensor.tensor import dmatrix, tensor
from pytensor.tensor.signal import convolve1d from pytensor.tensor.signal import convolve1d
from tests.link.numba.test_basic import compare_numba_and_py from tests.link.numba.test_basic import compare_numba_and_py
from tests.tensor.signal.test_conv import convolve1d_grad_benchmarker
pytestmark = pytest.mark.filterwarnings( pytestmark = pytest.mark.filterwarnings(
...@@ -43,31 +39,3 @@ def test_convolve1d(mode, bcast_order): ...@@ -43,31 +39,3 @@ def test_convolve1d(mode, bcast_order):
np.swapaxes(numba_fn(test_y, test_x), 0, 1), np.swapaxes(numba_fn(test_y, test_x), 0, 1),
res, res,
) )
@pytest.mark.parametrize("mode", ("full", "valid"), ids=lambda x: f"mode={x}")
@pytest.mark.parametrize("batch", (False, True), ids=lambda x: f"batch={x}")
def test_convolve1d_benchmark_numba(batch, mode, benchmark):
x = tensor(shape=(7, 183) if batch else (183,))
y = tensor(shape=(7, 6) if batch else (6,))
out = convolve1d(x, y, mode=mode)
fn = function([x, y], out, mode="NUMBA", trust_input=True)
rng = np.random.default_rng()
x_test = rng.normal(size=(x.type.shape)).astype(x.type.dtype)
y_test = rng.normal(size=(y.type.shape)).astype(y.type.dtype)
np_convolve1d = np.vectorize(
partial(np.convolve, mode=mode), signature="(x),(y)->(z)"
)
np.testing.assert_allclose(
fn(x_test, y_test),
np_convolve1d(x_test, y_test),
)
benchmark(fn, x_test, y_test)
@pytest.mark.parametrize("convolve_mode", ["full", "valid"])
def test_convolve1d_grad_benchmark_numba(convolve_mode, benchmark):
convolve1d_grad_benchmarker(convolve_mode, "NUMBA", benchmark)
...@@ -522,23 +522,6 @@ class TestNumbaWarnings: ...@@ -522,23 +522,6 @@ class TestNumbaWarnings:
np.testing.assert_allclose(fn(A_test, b_test), np.dot(A_test, b_test[:, None])) np.testing.assert_allclose(fn(A_test, b_test), np.dot(A_test, b_test[:, None]))
@pytest.mark.parametrize("mode", ("default", "trust_input", "direct"))
def test_function_overhead(mode, benchmark):
x = pt.vector("x")
out = pt.exp(x)
fn = function([x], out, mode="NUMBA")
if mode == "trust_input":
fn.trust_input = True
elif mode == "direct":
fn = fn.vm.jit_fn
test_x = np.zeros(1000)
assert np.sum(fn(test_x)) == 1000
benchmark(fn, test_x)
class ComplexType: class ComplexType:
def __init__(self, a, b): def __init__(self, a, b):
self.a = a self.a = a
......
...@@ -9,7 +9,7 @@ from pytensor.tensor.basic import Alloc, ARange, constant ...@@ -9,7 +9,7 @@ from pytensor.tensor.basic import Alloc, ARange, constant
from pytensor.tensor.blockwise import Blockwise, BlockwiseWithCoreShape from pytensor.tensor.blockwise import Blockwise, BlockwiseWithCoreShape
from pytensor.tensor.elemwise import DimShuffle, Elemwise from pytensor.tensor.elemwise import DimShuffle, Elemwise
from pytensor.tensor.nlinalg import SVD, Det from pytensor.tensor.nlinalg import SVD, Det
from pytensor.tensor.slinalg import Cholesky, cholesky from pytensor.tensor.slinalg import Cholesky
from tests.link.numba.test_basic import compare_numba_and_py, numba_mode from tests.link.numba.test_basic import compare_numba_and_py, numba_mode
...@@ -52,17 +52,6 @@ def test_non_square_blockwise(): ...@@ -52,17 +52,6 @@ def test_non_square_blockwise():
fn([3, 4, 5]) fn([3, 4, 5])
def test_blockwise_benchmark(benchmark):
x = tensor(shape=(5, 3, 3))
out = cholesky(x)
assert isinstance(out.owner.op, Blockwise)
fn = function([x], out, mode="NUMBA")
x_test = np.eye(3) * np.arange(1, 6)[:, None, None]
fn(x_test) # JIT compile
benchmark(fn, x_test)
def test_repeated_args(): def test_repeated_args():
x = tensor3("x") x = tensor3("x")
x_test = np.full((1, 1, 1), 2.0, dtype=x.type.dtype) x_test = np.full((1, 1, 1), 2.0, dtype=x.type.dtype)
......
...@@ -2,7 +2,6 @@ import contextlib ...@@ -2,7 +2,6 @@ import contextlib
import numpy as np import numpy as np
import pytest import pytest
import scipy.special
import pytensor import pytensor
import pytensor.tensor as pt import pytensor.tensor as pt
...@@ -13,7 +12,7 @@ from pytensor.compile.ops import deep_copy_op ...@@ -13,7 +12,7 @@ from pytensor.compile.ops import deep_copy_op
from pytensor.gradient import grad from pytensor.gradient import grad
from pytensor.scalar import Composite, float64 from pytensor.scalar import Composite, float64
from pytensor.scalar import add as scalar_add from pytensor.scalar import add as scalar_add
from pytensor.tensor import blas, matrix, tensor, tensor3 from pytensor.tensor import blas, matrix, tensor3
from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise
from pytensor.tensor.math import All, Any, Max, Min, Prod, ProdWithoutZeros, Sum from pytensor.tensor.math import All, Any, Max, Min, Prod, ProdWithoutZeros, Sum
from pytensor.tensor.special import LogSoftmax, Softmax, SoftmaxGrad from pytensor.tensor.special import LogSoftmax, Softmax, SoftmaxGrad
...@@ -22,11 +21,7 @@ from tests.link.numba.test_basic import ( ...@@ -22,11 +21,7 @@ from tests.link.numba.test_basic import (
numba_mode, numba_mode,
scalar_my_multi_out, scalar_my_multi_out,
) )
from tests.tensor.test_elemwise import ( from tests.tensor.test_elemwise import check_elemwise_runtime_broadcast
careduce_benchmark_tester,
check_elemwise_runtime_broadcast,
dimshuffle_benchmark,
)
rng = np.random.default_rng(42849) rng = np.random.default_rng(42849)
...@@ -686,78 +681,6 @@ def test_gammainc_wrt_k_grad(): ...@@ -686,78 +681,6 @@ def test_gammainc_wrt_k_grad():
) )
class TestsBenchmark:
def test_elemwise_speed(self, benchmark):
x = pt.dmatrix("y")
y = pt.dvector("z")
out = np.exp(2 * x * y + y)
rng = np.random.default_rng(42)
x_val = rng.normal(size=(200, 500))
y_val = rng.normal(size=500)
func = function([x, y], out, mode="NUMBA")
func = func.vm.jit_fn
(out,) = func(x_val, y_val)
np.testing.assert_allclose(np.exp(2 * x_val * y_val + y_val), out)
benchmark(func, x_val, y_val)
def test_fused_elemwise_benchmark(self, benchmark):
rng = np.random.default_rng(123)
size = 100_000
x = pytensor.shared(rng.normal(size=size), name="x")
mu = pytensor.shared(rng.normal(size=size), name="mu")
logp = -((x - mu) ** 2) / 2
grad_logp = grad(logp.sum(), x)
func = pytensor.function([], [logp, grad_logp], mode="NUMBA")
# JIT compile first
func()
benchmark(func)
@pytest.mark.parametrize("size", [(10, 10), (1000, 1000), (10000, 10000)])
@pytest.mark.parametrize("axis", [0, 1])
def test_logsumexp_benchmark(self, size, axis, benchmark):
X = pt.matrix("X")
X_max = pt.max(X, axis=axis, keepdims=True)
X_max = pt.switch(pt.isinf(X_max), 0, X_max)
X_lse = pt.log(pt.sum(pt.exp(X - X_max), axis=axis, keepdims=True)) + X_max
rng = np.random.default_rng(23920)
X_val = rng.normal(size=size)
X_lse_fn = pytensor.function([X], X_lse, mode="NUMBA")
# JIT compile first
res = X_lse_fn(X_val)
exp_res = scipy.special.logsumexp(X_val, axis=axis, keepdims=True)
np.testing.assert_array_almost_equal(res, exp_res)
benchmark(X_lse_fn, X_val)
@pytest.mark.parametrize(
"axis",
(0, 1, 2, (0, 1), (0, 2), (1, 2), None),
ids=lambda x: f"axis={x}",
)
@pytest.mark.parametrize(
"c_contiguous",
(True, False),
ids=lambda x: f"c_contiguous={x}",
)
def test_numba_careduce_benchmark(self, axis, c_contiguous, benchmark):
return careduce_benchmark_tester(
axis, c_contiguous, mode="NUMBA", benchmark=benchmark
)
@pytest.mark.parametrize("c_contiguous", (True, False))
def test_dimshuffle(self, c_contiguous, benchmark):
dimshuffle_benchmark("NUMBA", c_contiguous, benchmark)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"x, y", "x, y",
[ [
...@@ -855,18 +778,3 @@ def test_BatchedDot(x, y, exc): ...@@ -855,18 +778,3 @@ def test_BatchedDot(x, y, exc):
g, g,
[x_test_value, y_test_value], [x_test_value, y_test_value],
) )
@pytest.mark.parametrize("dtype", ("float64", "float32", "mixed"))
def test_mat_vec_dot_performance(dtype, benchmark):
A = tensor("A", shape=(512, 512), dtype="float64" if dtype == "mixed" else dtype)
x = tensor("x", shape=(512,), dtype="float32" if dtype == "mixed" else dtype)
out = ptm.dot(A, x)
fn = function([A, x], out, mode="NUMBA", trust_input=True)
rng = np.random.default_rng(948)
A_test = rng.standard_normal(size=A.type.shape, dtype=A.type.dtype)
x_test = rng.standard_normal(size=x.type.shape, dtype=x.type.dtype)
np.testing.assert_allclose(fn(A_test, x_test), np.dot(A_test, x_test), atol=1e-4)
benchmark(fn, A_test, x_test)
...@@ -13,7 +13,6 @@ from pytensor.compile.mode import Mode ...@@ -13,7 +13,6 @@ from pytensor.compile.mode import Mode
from pytensor.graph.rewriting.db import RewriteDatabaseQuery from pytensor.graph.rewriting.db import RewriteDatabaseQuery
from pytensor.link.numba.linker import NumbaLinker from pytensor.link.numba.linker import NumbaLinker
from pytensor.tensor.math import Max from pytensor.tensor.math import Max
from tests.fixtures import * # noqa: F403
opts = RewriteDatabaseQuery(include=[None], exclude=["cxx_only", "BlasOpt"]) opts = RewriteDatabaseQuery(include=[None], exclude=["cxx_only", "BlasOpt"])
...@@ -76,72 +75,3 @@ def test_careduce_performance(careduce_fn, numpy_fn, axis, inputs, input_vals): ...@@ -76,72 +75,3 @@ def test_careduce_performance(careduce_fn, numpy_fn, axis, inputs, input_vals):
# FIXME: Why are we asserting >=? Numba could be doing worse than numpy! # FIXME: Why are we asserting >=? Numba could be doing worse than numpy!
assert mean_numba_time / mean_numpy_time >= 0.75 assert mean_numba_time / mean_numpy_time >= 0.75
@pytest.mark.parametrize("cache", (False, True))
def test_radon_model_compile_repeatedly_numba_benchmark(cache, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
def compile_and_call_once():
with config.change_flags(numba__cache=cache):
fn = function(
[joined_inputs],
[model_logp, model_dlogp],
mode="NUMBA",
trust_input=True,
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=5, iterations=1)
@pytest.mark.parametrize("cache", (False, True))
def test_radon_model_compile_variants_numba_benchmark(
cache, radon_model, radon_model_variants, benchmark
):
"""Test compilation speed when a slightly variant of a function is compiled each time.
This test more realistically simulates a use case where a model is recompiled
multiple times with small changes, such as in an interactive environment.
NOTE: For this test to be meaningful on subsequent runs, the cache must be cleared
"""
joined_inputs, [model_logp, model_dlogp] = radon_model
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
# Compile base function once to populate the cache
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode="NUMBA", trust_input=True
)
fn(x)
def compile_and_call_once():
with config.change_flags(numba__cache=cache):
for joined_inputs, [model_logp, model_dlogp] in radon_model_variants:
fn = function(
[joined_inputs],
[model_logp, model_dlogp],
mode="NUMBA",
trust_input=True,
)
fn(x)
benchmark.pedantic(compile_and_call_once, rounds=1, iterations=1)
@pytest.mark.parametrize("cache", (False, True))
def test_radon_model_call_numba_benchmark(cache, radon_model, benchmark):
joined_inputs, [model_logp, model_dlogp] = radon_model
with config.change_flags(numba__cache=cache):
fn = function(
[joined_inputs], [model_logp, model_dlogp], mode="NUMBA", trust_input=True
)
rng = np.random.default_rng(1)
x = rng.normal(size=joined_inputs.type.shape).astype(config.floatX)
fn(x) # warmup
benchmark.pedantic(fn, (x,), rounds=10_000, iterations=10)
...@@ -160,7 +160,7 @@ def test_xit_xot_types( ...@@ -160,7 +160,7 @@ def test_xit_xot_types(
assert np.allclose(res_val, output_vals) assert np.allclose(res_val, output_vals)
def test_scan_multiple_output(benchmark): def test_scan_multiple_output():
"""Test a scan implementation of a SEIR model. """Test a scan implementation of a SEIR model.
SEIR model definition: SEIR model definition:
...@@ -243,14 +243,12 @@ def test_scan_multiple_output(benchmark): ...@@ -243,14 +243,12 @@ def test_scan_multiple_output(benchmark):
gamma_val, gamma_val,
delta_val, delta_val,
] ]
scan_fn, _ = compare_numba_and_py( compare_numba_and_py(
[pt_C, pt_D, st0, et0, it0, logp_c, logp_d, beta, gamma, delta], [pt_C, pt_D, st0, et0, it0, logp_c, logp_d, beta, gamma, delta],
out, out,
test_input_vals, test_input_vals,
) )
benchmark(scan_fn, *test_input_vals)
def test_scan_tap_output(): def test_scan_tap_output():
a_pt = pt.scalar("a") a_pt = pt.scalar("a")
...@@ -415,8 +413,8 @@ def test_inner_graph_optimized(): ...@@ -415,8 +413,8 @@ def test_inner_graph_optimized():
) )
def test_vector_taps_benchmark(benchmark): def test_vector_taps():
"""Test vector taps performance. """Test vector taps.
Vector taps get indexed into numeric types, that must be wrapped back into Vector taps get indexed into numeric types, that must be wrapped back into
scalar arrays. The numba Scan implementation has an optimization to reuse scalar arrays. The numba Scan implementation has an optimization to reuse
...@@ -464,8 +462,6 @@ def test_vector_taps_benchmark(benchmark): ...@@ -464,8 +462,6 @@ def test_vector_taps_benchmark(benchmark):
for numba_r, ref_r in zip(numba_res, ref_res, strict=True): for numba_r, ref_r in zip(numba_res, ref_res, strict=True):
np.testing.assert_array_almost_equal(numba_r, ref_r) np.testing.assert_array_almost_equal(numba_r, ref_r)
benchmark(numba_fn, *test.values())
@pytest.mark.parametrize("n_steps_constant", (True, False)) @pytest.mark.parametrize("n_steps_constant", (True, False))
def test_inplace_taps(n_steps_constant): def test_inplace_taps(n_steps_constant):
...@@ -542,10 +538,10 @@ def test_inplace_taps(n_steps_constant): ...@@ -542,10 +538,10 @@ def test_inplace_taps(n_steps_constant):
) )
@pytest.mark.parametrize("n_steps, op_size", [(10, 2), (512, 2), (512, 256)]) @pytest.mark.parametrize("n_steps, op_size", [(10, 2), (512, 2), (512, 256)])
class TestScanSITSOTBuffer: class TestScanSITSOTBuffer:
def buffer_tester(self, n_steps, op_size, buffer_size, benchmark=None): def buffer_tester(self, n_steps, op_size, buffer_size):
x0 = pt.vector(shape=(op_size,), dtype="float64") x0 = pt.vector(shape=(op_size,), dtype="float64")
xs = pytensor.scan( xs = pytensor.scan(
fn=lambda xtm1: (xtm1 + 1), fn=lambda xtm1: xtm1 + 1,
outputs_info=[x0], outputs_info=[x0],
n_steps=n_steps - 1, # 1- makes it easier to align/misalign n_steps=n_steps - 1, # 1- makes it easier to align/misalign
return_updates=False, return_updates=False,
...@@ -582,21 +578,14 @@ class TestScanSITSOTBuffer: ...@@ -582,21 +578,14 @@ class TestScanSITSOTBuffer:
buffer = scan_node.inputs[1] buffer = scan_node.inputs[1]
assert buffer.type.shape[0] == expected_buffer_size assert buffer.type.shape[0] == expected_buffer_size
if benchmark is not None:
numba_fn.trust_input = True
benchmark(numba_fn, x_test)
def test_sit_sot_buffer(self, n_steps, op_size, buffer_size): def test_sit_sot_buffer(self, n_steps, op_size, buffer_size):
self.buffer_tester(n_steps, op_size, buffer_size, benchmark=None) self.buffer_tester(n_steps, op_size, buffer_size)
def test_sit_sot_buffer_benchmark(self, n_steps, op_size, buffer_size, benchmark):
self.buffer_tester(n_steps, op_size, buffer_size, benchmark=benchmark)
@pytest.mark.parametrize("constant_n_steps", [False, True]) @pytest.mark.parametrize("constant_n_steps", [False, True])
@pytest.mark.parametrize("n_steps_val", [1, 1000]) @pytest.mark.parametrize("n_steps_val", [1, 1000])
class TestScanMITSOTBuffer: class TestScanMITSOTBuffer:
def buffer_tester(self, constant_n_steps, n_steps_val, benchmark=None): def buffer_tester(self, constant_n_steps, n_steps_val):
"""Make sure we can handle storage changes caused by the `scan_save_mem` rewrite.""" """Make sure we can handle storage changes caused by the `scan_save_mem` rewrite."""
def f_pow2(x_tm2, x_tm1): def f_pow2(x_tm2, x_tm1):
...@@ -644,15 +633,9 @@ class TestScanMITSOTBuffer: ...@@ -644,15 +633,9 @@ class TestScanMITSOTBuffer:
on_unused_input="ignore", on_unused_input="ignore",
) )
assert tuple(mitsot_buffer_shape) == (2,) assert tuple(mitsot_buffer_shape) == (2,)
if benchmark is not None:
numba_fn.trust_input = True
benchmark(numba_fn, *test_vals)
def test_mit_sot_buffer(self, constant_n_steps, n_steps_val): def test_mit_sot_buffer(self, constant_n_steps, n_steps_val):
self.buffer_tester(constant_n_steps, n_steps_val, benchmark=None) self.buffer_tester(constant_n_steps, n_steps_val)
def test_mit_sot_buffer_benchmark(self, constant_n_steps, n_steps_val, benchmark):
self.buffer_tester(constant_n_steps, n_steps_val, benchmark=benchmark)
def test_higher_order_derivatives(): def test_higher_order_derivatives():
......
...@@ -52,8 +52,6 @@ from pytensor.tensor.subtensor import Subtensor ...@@ -52,8 +52,6 @@ from pytensor.tensor.subtensor import Subtensor
from pytensor.tensor.type import ( from pytensor.tensor.type import (
TensorType, TensorType,
dcol, dcol,
dmatrix,
dscalar,
dvector, dvector,
fmatrix, fmatrix,
fscalar, fscalar,
...@@ -2357,7 +2355,7 @@ def test_cvm_exception_handling(mode): ...@@ -2357,7 +2355,7 @@ def test_cvm_exception_handling(mode):
@pytest.mark.skipif( @pytest.mark.skipif(
not config.cxx, reason="G++ not available, so we need to skip this test." not config.cxx, reason="G++ not available, so we need to skip this test."
) )
def test_cython_performance(benchmark): def test_cython_performance():
# This implicitly confirms that the Cython version is being used # This implicitly confirms that the Cython version is being used
from pytensor.scan import scan_perform_ext # noqa: F401 from pytensor.scan import scan_perform_ext # noqa: F401
...@@ -2391,7 +2389,7 @@ def test_cython_performance(benchmark): ...@@ -2391,7 +2389,7 @@ def test_cython_performance(benchmark):
# Make sure we're actually computing a `Scan` # Make sure we're actually computing a `Scan`
assert any(isinstance(node.op, Scan) for node in f_cvm.maker.fgraph.apply_nodes) assert any(isinstance(node.op, Scan) for node in f_cvm.maker.fgraph.apply_nodes)
cvm_res = benchmark(f_cvm) cvm_res = f_cvm()
# Make sure the results are the same between the two implementations # Make sure the results are the same between the two implementations
assert np.allclose(cvm_res, py_res) assert np.allclose(cvm_res, py_res)
...@@ -2741,7 +2739,7 @@ class TestExamples: ...@@ -2741,7 +2739,7 @@ class TestExamples:
n_result = numpy_implementation(v_vsample) n_result = numpy_implementation(v_vsample)
utt.assert_allclose(t_result, n_result) utt.assert_allclose(t_result, n_result)
def test_reordering(self, benchmark): def test_reordering(self):
"""Test re-ordering of inputs. """Test re-ordering of inputs.
some rnn with multiple outputs and multiple inputs; other some rnn with multiple outputs and multiple inputs; other
...@@ -2800,38 +2798,13 @@ class TestExamples: ...@@ -2800,38 +2798,13 @@ class TestExamples:
v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + np.dot(v_x[i - 1], vW) v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + np.dot(v_x[i - 1], vW)
v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1] v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]
(_pytensor_dump1, _pytensor_dump2, pytensor_x, pytensor_y) = benchmark( (_pytensor_dump1, _pytensor_dump2, pytensor_x, pytensor_y) = f4(
f4, v_u1, v_u2, v_x0, v_y0, vW_in1 v_u1, v_u2, v_x0, v_y0, vW_in1
) )
utt.assert_allclose(pytensor_x, v_x) utt.assert_allclose(pytensor_x, v_x)
utt.assert_allclose(pytensor_y, v_y) utt.assert_allclose(pytensor_y, v_y)
def test_scan_as_tensor_on_gradients(self, benchmark):
to_scan = dvector("to_scan")
seq = dmatrix("seq")
f1 = dscalar("f1")
def scanStep(prev, seq, f1):
return prev + f1 * seq
scanned = scan(
fn=scanStep,
sequences=[seq],
outputs_info=[to_scan],
non_sequences=[f1],
return_updates=False,
)
function(inputs=[to_scan, seq, f1], outputs=scanned, allow_input_downcast=True)
t_grad = grad(scanned.sum(), wrt=[to_scan, f1], consider_constant=[seq])
benchmark(
function,
inputs=[to_scan, seq, f1],
outputs=t_grad,
allow_input_downcast=True,
)
def caching_nsteps_by_scan_op(self): def caching_nsteps_by_scan_op(self):
W = matrix("weights") W = matrix("weights")
initial = vector("initial") initial = vector("initial")
...@@ -3128,7 +3101,7 @@ class TestExamples: ...@@ -3128,7 +3101,7 @@ class TestExamples:
utt.assert_allclose(outputs, expected_outputs) utt.assert_allclose(outputs, expected_outputs)
@pytest.mark.slow @pytest.mark.slow
def test_hessian_bug_grad_grad_two_scans(self, benchmark): def test_hessian_bug_grad_grad_two_scans(self):
# Bug reported by Bitton Tenessi # Bug reported by Bitton Tenessi
# NOTE : The test to reproduce the bug reported by Bitton Tenessi # NOTE : The test to reproduce the bug reported by Bitton Tenessi
# was modified from its original version to be faster to run. # was modified from its original version to be faster to run.
...@@ -3163,7 +3136,7 @@ class TestExamples: ...@@ -3163,7 +3136,7 @@ class TestExamples:
H = hessian(cost, W) H = hessian(cost, W)
# print(".", file=sys.stderr) # print(".", file=sys.stderr)
f = function([W, n_steps], H) f = function([W, n_steps], H)
benchmark(f, np.ones((8,), dtype="float32"), 1) f(np.ones((8,), dtype="float32"), 1)
def test_grad_connectivity_matrix(self): def test_grad_connectivity_matrix(self):
def inner_fn(x_tm1, y_tm1, z_tm1): def inner_fn(x_tm1, y_tm1, z_tm1):
...@@ -3747,7 +3720,7 @@ class TestExamples: ...@@ -3747,7 +3720,7 @@ class TestExamples:
utt.assert_allclose(pytensor_x, v_x) utt.assert_allclose(pytensor_x, v_x)
utt.assert_allclose(pytensor_y, v_y) utt.assert_allclose(pytensor_y, v_y)
def test_multiple_outs_taps(self, benchmark): def test_multiple_outs_taps(self):
l = 5 l = 5
rng = np.random.default_rng(utt.fetch_seed()) rng = np.random.default_rng(utt.fetch_seed())
...@@ -3841,8 +3814,6 @@ class TestExamples: ...@@ -3841,8 +3814,6 @@ class TestExamples:
np.testing.assert_almost_equal(res[1], ny1) np.testing.assert_almost_equal(res[1], ny1)
np.testing.assert_almost_equal(res[2], ny2) np.testing.assert_almost_equal(res[2], ny2)
benchmark(f, v_u1, v_u2, v_x0, v_y0, vW_in1)
def _grad_mout_helper(self, n_iters, mode): def _grad_mout_helper(self, n_iters, mode):
rng = np.random.default_rng(utt.fetch_seed()) rng = np.random.default_rng(utt.fetch_seed())
n_hid = 3 n_hid = 3
......
...@@ -674,28 +674,6 @@ class TestPushOutAddScan: ...@@ -674,28 +674,6 @@ class TestPushOutAddScan:
vB = rng.uniform(size=(5, 5)).astype(config.floatX) vB = rng.uniform(size=(5, 5)).astype(config.floatX)
utt.assert_allclose(f(vA, vB), np.dot(vA.T, vB)) utt.assert_allclose(f(vA, vB), np.dot(vA.T, vB))
def test_pregreedy_optimizer(self, benchmark):
W = pt.zeros((5, 4))
bv = pt.zeros((5,))
bh = pt.zeros((4,))
v = matrix("v")
(bv_t, bh_t) = scan(
lambda _: [bv, bh],
sequences=v,
outputs_info=[None, None],
return_updates=False,
)
chain = scan(
lambda x: dot(dot(x, W) + bh_t, W.T) + bv_t,
outputs_info=v,
n_steps=2,
return_updates=False,
)
# TODO FIXME: Make this a real test and assert something.
chain_fn = function([v], chain)
benchmark(chain_fn, np.zeros((3, 5), dtype=config.floatX))
def test_machine_translation(self): def test_machine_translation(self):
""" """
This test case comes from https://github.com/rizar/scan-grad-speed and This test case comes from https://github.com/rizar/scan-grad-speed and
...@@ -1560,18 +1538,6 @@ class TestSaveMem: ...@@ -1560,18 +1538,6 @@ class TestSaveMem:
] ]
assert len(scan_nodes) == 1 assert len(scan_nodes) == 1
def test_savemem_opt(self, benchmark):
y0 = shared(np.ones((2, 10)))
[_y1, y2] = scan(
lambda y: [y, y],
outputs_info=[dict(initial=y0, taps=[-2]), None],
n_steps=5,
return_updates=False,
)
# TODO FIXME: Make this a real test and assert something.
fn = function([], y2.sum(), mode=self.mode)
benchmark(fn)
def test_savemem_opt_0_step(self): def test_savemem_opt_0_step(self):
""" """
Test a case where the savemem optimization has the opportunity to Test a case where the savemem optimization has the opportunity to
......
...@@ -11,7 +11,6 @@ from pytensor.compile.function import function ...@@ -11,7 +11,6 @@ from pytensor.compile.function import function
from pytensor.compile.function.types import add_supervisor_to_fgraph from pytensor.compile.function.types import add_supervisor_to_fgraph
from pytensor.compile.mode import Mode, get_default_mode from pytensor.compile.mode import Mode, get_default_mode
from pytensor.configdefaults import config from pytensor.configdefaults import config
from pytensor.gradient import grad
from pytensor.graph.basic import Constant, equal_computations from pytensor.graph.basic import Constant, equal_computations
from pytensor.graph.fg import FunctionGraph from pytensor.graph.fg import FunctionGraph
from pytensor.graph.rewriting.basic import check_stack_trace, out2in from pytensor.graph.rewriting.basic import check_stack_trace, out2in
...@@ -1351,40 +1350,6 @@ class TestFusion: ...@@ -1351,40 +1350,6 @@ class TestFusion:
assert len(nodes) == 1 assert len(nodes) == 1
assert isinstance(nodes[0].op.scalar_op, Composite) assert isinstance(nodes[0].op.scalar_op, Composite)
def test_eval_benchmark(self, benchmark):
rng = np.random.default_rng(123)
size = 100_000
x = pytensor.shared(rng.normal(size=size), name="x")
mu = pytensor.shared(rng.normal(size=size), name="mu")
logp = -((x - mu) ** 2) / 2
grad_logp = grad(logp.sum(), x)
func = pytensor.function([], [logp, grad_logp], mode="FAST_RUN")
benchmark(func)
@pytest.mark.skipif(not config.cxx, reason="No cxx compiler")
@pytest.mark.parametrize(
"graph_fn, n, expected_n_repl",
[
("deep_small_kernels", 20, (20, 60)),
("large_fuseable_graph", 25, (128, 876)),
],
)
def test_rewrite_benchmark(self, graph_fn, n, expected_n_repl, benchmark):
inps, outs = getattr(self, graph_fn)(n)
fg = FunctionGraph(inps, outs)
opt = FusionOptimizer()
def rewrite_func():
fg_clone = fg.clone()
_, nb_fused, nb_replacement, *_ = opt.apply(fg_clone)
# fg_clone.dprint()
return nb_fused, nb_replacement
assert rewrite_func() == expected_n_repl
benchmark.pedantic(rewrite_func, rounds=7, iterations=5)
def test_no_warning_from_old_client(self): def test_no_warning_from_old_client(self):
# There used to be a warning issued when creating fuseable mapping # There used to be a warning issued when creating fuseable mapping
# for nodes that are no longer in the FunctionGraph # for nodes that are no longer in the FunctionGraph
......
...@@ -5006,37 +5006,3 @@ class TestBlockDiagDotToDotBlockDiag: ...@@ -5006,37 +5006,3 @@ class TestBlockDiagDotToDotBlockDiag:
original, include=("canonicalize", "stabilize", "specialize") original, include=("canonicalize", "stabilize", "specialize")
) )
assert_equal_computations([rewritten], [original]) assert_equal_computations([rewritten], [original])
@pytest.mark.parametrize("rewrite", [True, False], ids=["rewrite", "no_rewrite"])
@pytest.mark.parametrize("size", [10, 100, 1000], ids=["small", "medium", "large"])
def test_benchmark(self, benchmark, size, rewrite):
rng = np.random.default_rng()
a_size = int(rng.uniform(1, int(0.8 * size)))
b_size = int(rng.uniform(1, int(0.8 * (size - a_size))))
c_size = size - a_size - b_size
a = tensor("a", shape=(a_size, a_size))
b = tensor("b", shape=(b_size, b_size))
c = tensor("c", shape=(c_size, c_size))
d = tensor("d", shape=(size,))
x = pt.linalg.block_diag(a, b, c)
out = x @ d
mode = get_default_mode()
if not rewrite:
mode = mode.excluding("local_block_diag_dot_to_dot_block_diag")
fn = pytensor.function([a, b, c, d], out, mode=mode)
a_val = rng.normal(size=a.type.shape).astype(a.type.dtype)
b_val = rng.normal(size=b.type.shape).astype(b.type.dtype)
c_val = rng.normal(size=c.type.shape).astype(c.type.dtype)
d_val = rng.normal(size=d.type.shape).astype(d.type.dtype)
benchmark(
fn,
a_val,
b_val,
c_val,
d_val,
)
...@@ -8,7 +8,7 @@ from scipy.signal import convolve2d as scipy_convolve2d ...@@ -8,7 +8,7 @@ from scipy.signal import convolve2d as scipy_convolve2d
from pytensor import config, function, grad from pytensor import config, function, grad
from pytensor.graph.rewriting import rewrite_graph from pytensor.graph.rewriting import rewrite_graph
from pytensor.graph.traversal import ancestors, io_toposort from pytensor.graph.traversal import ancestors, io_toposort
from pytensor.tensor import matrix, tensor, vector from pytensor.tensor import matrix, vector
from pytensor.tensor.basic import expand_dims from pytensor.tensor.basic import expand_dims
from pytensor.tensor.blockwise import Blockwise from pytensor.tensor.blockwise import Blockwise
from pytensor.tensor.signal.conv import Convolve1d, convolve1d, convolve2d from pytensor.tensor.signal.conv import Convolve1d, convolve1d, convolve2d
...@@ -122,27 +122,6 @@ def test_convolve1d_valid_grad(static_shape): ...@@ -122,27 +122,6 @@ def test_convolve1d_valid_grad(static_shape):
assert full_mode.eval({larger: smaller_test, smaller: larger_test}) == True # noqa: E712 assert full_mode.eval({larger: smaller_test, smaller: larger_test}) == True # noqa: E712
def convolve1d_grad_benchmarker(convolve_mode, mode, benchmark):
# Use None core shape so PyTensor doesn't know which mode to use until runtime.
larger = tensor("larger", shape=(8, None))
smaller = tensor("smaller", shape=(8, None))
grad_wrt_smaller = grad(
convolve1d(larger, smaller, mode=convolve_mode).sum(), wrt=smaller
)
fn = function([larger, smaller], grad_wrt_smaller, trust_input=True, mode=mode)
rng = np.random.default_rng([119, mode == "full"])
test_larger = rng.normal(size=(8, 1024)).astype(larger.type.dtype)
test_smaller = rng.normal(size=(8, 16)).astype(smaller.type.dtype)
benchmark(fn, test_larger, test_smaller)
@pytest.mark.parametrize("convolve_mode", ["full", "valid"])
def test_convolve1d_grad_benchmark_c(convolve_mode, benchmark):
convolve1d_grad_benchmarker(convolve_mode, "FAST_RUN", benchmark)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"kernel_shape", [(3, 3), (5, 3), (5, 8)], ids=lambda x: f"kernel_shape={x}" "kernel_shape", [(3, 3), (5, 3), (5, 8)], ids=lambda x: f"kernel_shape={x}"
) )
......
...@@ -118,7 +118,6 @@ from pytensor.tensor.type import ( ...@@ -118,7 +118,6 @@ from pytensor.tensor.type import (
ivector, ivector,
lscalar, lscalar,
lvector, lvector,
matrices,
matrix, matrix,
row, row,
scalar, scalar,
...@@ -2106,32 +2105,6 @@ class TestJoinAndSplit: ...@@ -2106,32 +2105,6 @@ class TestJoinAndSplit:
assert np.allclose(r, expected) assert np.allclose(r, expected)
assert r.base is x_test assert r.base is x_test
@pytest.mark.parametrize("gc", (True, False), ids=lambda x: f"gc={x}")
@pytest.mark.parametrize("memory_layout", ["C-contiguous", "F-contiguous", "Mixed"])
@pytest.mark.parametrize("axis", (0, 1), ids=lambda x: f"axis={x}")
@pytest.mark.parametrize("ndim", (1, 2), ids=["vector", "matrix"])
@config.change_flags(cmodule__warn_no_version=False)
def test_join_performance(self, ndim, axis, memory_layout, gc, benchmark):
if ndim == 1 and not (memory_layout == "C-contiguous" and axis == 0):
pytest.skip("Redundant parametrization")
n = 64
inputs = vectors("abcdef") if ndim == 1 else matrices("abcdef")
out = join(axis, *inputs)
fn = pytensor.function(inputs, Out(out, borrow=True), trust_input=True)
fn.vm.allow_gc = gc
test_values = [np.zeros((n, n)[:ndim], dtype=inputs[0].dtype) for _ in inputs]
if memory_layout == "C-contiguous":
pass
elif memory_layout == "F-contiguous":
test_values = [t.T for t in test_values]
elif memory_layout == "Mixed":
test_values = [t if i % 2 else t.T for i, t in enumerate(test_values)]
else:
raise ValueError
assert fn(*test_values).shape == (n * 6, n)[:ndim] if axis == 0 else (n, n * 6)
benchmark(fn, *test_values)
def test_join_negative_axis_rewrite(self): def test_join_negative_axis_rewrite(self):
"""Test that constant negative axis is rewritten to positive axis in make_node.""" """Test that constant negative axis is rewritten to positive axis in make_node."""
v = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=self.floatX) v = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=self.floatX)
......
...@@ -443,94 +443,3 @@ class TestSdotNoFlags(TestCGemvNoFlags): ...@@ -443,94 +443,3 @@ class TestSdotNoFlags(TestCGemvNoFlags):
class TestBlasStridesC(TestBlasStrides): class TestBlasStridesC(TestBlasStrides):
mode = mode_blas_opt mode = mode_blas_opt
def test_gemv_vector_dot_perf(benchmark):
n = 400_000
a = pt.vector("A", shape=(n,))
b = pt.vector("x", shape=(n,))
out = CGemv(inplace=True)(
pt.empty((1,)),
1.0,
a[None],
b,
0.0,
)
fn = pytensor.function([a, b], out, accept_inplace=True, trust_input=True)
rng = np.random.default_rng(430)
test_a = rng.normal(size=n)
test_b = rng.normal(size=n)
np.testing.assert_allclose(
fn(test_a, test_b),
np.dot(test_a, test_b),
)
benchmark(fn, test_a, test_b)
@pytest.mark.parametrize(
"neg_stride1", (True, False), ids=["neg_stride1", "pos_stride1"]
)
@pytest.mark.parametrize(
"neg_stride0", (True, False), ids=["neg_stride0", "pos_stride0"]
)
@pytest.mark.parametrize("F_layout", (True, False), ids=["F_layout", "C_layout"])
def test_gemv_negative_strides_perf(neg_stride0, neg_stride1, F_layout, benchmark):
A = pt.matrix("A", shape=(512, 512))
x = pt.vector("x", shape=(A.type.shape[-1],))
y = pt.vector("y", shape=(A.type.shape[0],))
out = CGemv(inplace=False)(
y,
1.0,
A,
x,
1.0,
)
fn = pytensor.function([A, x, y], out, trust_input=True)
rng = np.random.default_rng(430)
test_A = rng.normal(size=A.type.shape)
test_x = rng.normal(size=x.type.shape)
test_y = rng.normal(size=y.type.shape)
if F_layout:
test_A = test_A.T
if neg_stride0:
test_A = test_A[::-1]
if neg_stride1:
test_A = test_A[:, ::-1]
assert (test_A.strides[0] < 0) == neg_stride0
assert (test_A.strides[1] < 0) == neg_stride1
# Check result is correct by using a copy of A with positive strides
res = fn(test_A, test_x, test_y)
np.testing.assert_allclose(res, fn(test_A.copy(), test_x, test_y))
benchmark(fn, test_A, test_x, test_y)
@pytest.mark.parametrize("inplace", (True, False), ids=["inplace", "no_inplace"])
@pytest.mark.parametrize("n", [2**7, 2**9, 2**13])
def test_ger_benchmark(n, inplace, benchmark):
alpha = pt.dscalar("alpha")
x = pt.dvector("x")
y = pt.dvector("y")
A = pt.dmatrix("A")
out = alpha * pt.outer(x, y) + A
fn = pytensor.function(
[alpha, x, y, pytensor.In(A, mutable=inplace)], out, trust_input=True
)
rng = np.random.default_rng([2274, n])
alpha_test = rng.normal(size=())
x_test = rng.normal(size=(n,))
y_test = rng.normal(size=(n,))
A_test = rng.normal(size=(n, n))
benchmark(fn, alpha_test, x_test, y_test, A_test)
...@@ -15,9 +15,6 @@ from pytensor.graph.replace import _vectorize_node, vectorize_graph ...@@ -15,9 +15,6 @@ from pytensor.graph.replace import _vectorize_node, vectorize_graph
from pytensor.link.numba import NumbaLinker from pytensor.link.numba import NumbaLinker
from pytensor.raise_op import assert_op from pytensor.raise_op import assert_op
from pytensor.tensor import ( from pytensor.tensor import (
diagonal,
dmatrix,
log,
matrices, matrices,
matrix, matrix,
ones_like, ones_like,
...@@ -34,7 +31,6 @@ from pytensor.tensor.nlinalg import MatrixInverse, eig ...@@ -34,7 +31,6 @@ from pytensor.tensor.nlinalg import MatrixInverse, eig
from pytensor.tensor.random import normal from pytensor.tensor.random import normal
from pytensor.tensor.random.op import default_rng from pytensor.tensor.random.op import default_rng
from pytensor.tensor.rewriting.blas import specialize_matmul_to_batched_dot from pytensor.tensor.rewriting.blas import specialize_matmul_to_batched_dot
from pytensor.tensor.signal import convolve1d
from pytensor.tensor.slinalg import ( from pytensor.tensor.slinalg import (
Cholesky, Cholesky,
Solve, Solve,
...@@ -530,66 +526,6 @@ class TestSolveMatrix(BlockwiseOpTester): ...@@ -530,66 +526,6 @@ class TestSolveMatrix(BlockwiseOpTester):
signature = "(m, m),(m, n) -> (m, n)" signature = "(m, m),(m, n) -> (m, n)"
@pytest.mark.parametrize(
"mu_batch_shape", [(), (1000,), (4, 1000)], ids=lambda arg: f"mu:{arg}"
)
@pytest.mark.parametrize(
"cov_batch_shape", [(), (1000,), (4, 1000)], ids=lambda arg: f"cov:{arg}"
)
def test_batched_mvnormal_logp_and_dlogp(mu_batch_shape, cov_batch_shape, benchmark):
rng = np.random.default_rng(sum(map(ord, "batched_mvnormal")))
value_batch_shape = mu_batch_shape
if len(cov_batch_shape) > len(mu_batch_shape):
value_batch_shape = cov_batch_shape
value = tensor("value", shape=(*value_batch_shape, 10))
mu = tensor("mu", shape=(*mu_batch_shape, 10))
cov = tensor("cov", shape=(*cov_batch_shape, 10, 10))
test_values = [
rng.normal(size=value.type.shape),
rng.normal(size=mu.type.shape),
np.eye(cov.type.shape[-1]) * np.abs(rng.normal(size=cov.type.shape)),
]
chol_cov = cholesky(cov, lower=True, on_error="raise")
delta_trans = solve_triangular(chol_cov, value - mu, b_ndim=1)
quaddist = (delta_trans**2).sum(axis=-1)
diag = diagonal(chol_cov, axis1=-2, axis2=-1)
logdet = log(diag).sum(axis=-1)
k = value.shape[-1]
norm = -0.5 * k * (np.log(2 * np.pi))
logp = norm - 0.5 * quaddist - logdet
dlogp = grad(logp.sum(), wrt=[value, mu, cov])
fn = pytensor.function([value, mu, cov], [logp, *dlogp])
benchmark(fn, *test_values)
def test_small_blockwise_performance(benchmark):
a = dmatrix(shape=(7, 128))
b = dmatrix(shape=(7, 20))
out = convolve1d(a, b, mode="valid")
fn = pytensor.function([a, b], out, trust_input=True)
assert isinstance(
fn.maker.fgraph.outputs[0].owner.op, Blockwise | BlockwiseWithCoreShape
)
rng = np.random.default_rng(495)
a_test = rng.normal(size=a.type.shape)
b_test = rng.normal(size=b.type.shape)
np.testing.assert_allclose(
fn(a_test, b_test),
[
np.convolve(a_test[i], b_test[i], mode="valid")
for i in range(a_test.shape[0])
],
)
benchmark(fn, a_test, b_test)
def test_cop_with_params(): def test_cop_with_params():
matrix_assert = Blockwise(core_op=assert_op, signature="(x1,x2),()->(x1,x2)") matrix_assert = Blockwise(core_op=assert_op, signature="(x1,x2),()->(x1,x2)")
......
import itertools
import math import math
import re import re
import tracemalloc import tracemalloc
...@@ -11,7 +10,7 @@ import pytensor ...@@ -11,7 +10,7 @@ import pytensor
import pytensor.scalar as ps import pytensor.scalar as ps
import pytensor.tensor as pt import pytensor.tensor as pt
import tests.unittest_tools as utt import tests.unittest_tools as utt
from pytensor import In, Out, config, grad from pytensor import In, config, grad
from pytensor.compile.function import function from pytensor.compile.function import function
from pytensor.compile.mode import Mode, get_default_mode from pytensor.compile.mode import Mode, get_default_mode
from pytensor.graph.basic import Apply, Variable from pytensor.graph.basic import Apply, Variable
...@@ -41,7 +40,6 @@ from pytensor.tensor.type import ( ...@@ -41,7 +40,6 @@ from pytensor.tensor.type import (
matrix, matrix,
scalar, scalar,
tensor, tensor,
tensor3,
vector, vector,
vectors, vectors,
) )
...@@ -80,30 +78,6 @@ def reduce_bitwise_and(x, axis=-1, dtype="int8"): ...@@ -80,30 +78,6 @@ def reduce_bitwise_and(x, axis=-1, dtype="int8"):
return np.apply_along_axis(custom_reduce, axis, x) return np.apply_along_axis(custom_reduce, axis, x)
def dimshuffle_benchmark(mode, c_contiguous, benchmark):
x = tensor3("x")
if c_contiguous:
x_val = np.random.random((2, 3, 4)).astype(config.floatX)
else:
x_val = np.random.random((200, 300, 400)).transpose(1, 2, 0)
ys = [x.transpose(t) for t in itertools.permutations((0, 1, 2))]
ys += [
x[None],
x[:, None],
x[:, :, None],
x[:, :, :, None],
]
# Borrow to avoid deepcopy overhead
fn = pytensor.function(
[In(x, borrow=True)],
[Out(y, borrow=True) for y in ys],
mode=mode,
)
fn.trust_input = True
fn(x_val) # JIT compile for JIT backends
benchmark(fn, x_val)
class TestDimShuffle(unittest_tools.InferShapeTester): class TestDimShuffle(unittest_tools.InferShapeTester):
op = DimShuffle op = DimShuffle
type = TensorType type = TensorType
...@@ -261,10 +235,6 @@ class TestDimShuffle(unittest_tools.InferShapeTester): ...@@ -261,10 +235,6 @@ class TestDimShuffle(unittest_tools.InferShapeTester):
with pytest.raises(TypeError, match="input_ndim must be an integer"): with pytest.raises(TypeError, match="input_ndim must be an integer"):
DimShuffle(input_ndim=(True, False), new_order=(1, 0)) DimShuffle(input_ndim=(True, False), new_order=(1, 0))
@pytest.mark.parametrize("c_contiguous", [True, False])
def test_benchmark(self, c_contiguous, benchmark):
dimshuffle_benchmark("FAST_RUN", c_contiguous, benchmark)
class TestBroadcast: class TestBroadcast:
# this is to allow other types to reuse this class to test their ops # this is to allow other types to reuse this class to test their ops
...@@ -1077,38 +1047,6 @@ class TestVectorize: ...@@ -1077,38 +1047,6 @@ class TestVectorize:
assert vect_out.owner.inputs[0] is bool_tns assert vect_out.owner.inputs[0] is bool_tns
def careduce_benchmark_tester(axis, c_contiguous, mode, benchmark):
N = 256
x_test = np.random.uniform(size=(N, N, N))
transpose_axis = (0, 1, 2) if c_contiguous else (2, 0, 1)
x = pytensor.shared(x_test, name="x", shape=x_test.shape)
out = x.transpose(transpose_axis).sum(axis=axis)
fn = pytensor.function([], out, mode=mode)
np.testing.assert_allclose(
fn(),
x_test.transpose(transpose_axis).sum(axis=axis),
)
benchmark(fn)
@pytest.mark.parametrize(
"axis",
(0, 1, 2, (0, 1), (0, 2), (1, 2), None),
ids=lambda x: f"axis={x}",
)
@pytest.mark.parametrize(
"c_contiguous",
(True, False),
ids=lambda x: f"c_contiguous={x}",
)
def test_c_careduce_benchmark(axis, c_contiguous, benchmark):
return careduce_benchmark_tester(
axis, c_contiguous, mode="FAST_RUN", benchmark=benchmark
)
def test_gradient_mixed_discrete_output_scalar_op(): def test_gradient_mixed_discrete_output_scalar_op():
class MixedDtypeScalarOp(ScalarOp): class MixedDtypeScalarOp(ScalarOp):
def make_node(self, *inputs): def make_node(self, *inputs):
......
...@@ -12,7 +12,7 @@ from pytensor.compile.mode import get_default_mode ...@@ -12,7 +12,7 @@ from pytensor.compile.mode import get_default_mode
from pytensor.configdefaults import config from pytensor.configdefaults import config
from pytensor.gradient import NullTypeGradError, verify_grad from pytensor.gradient import NullTypeGradError, verify_grad
from pytensor.scalar import ScalarLoop from pytensor.scalar import ScalarLoop
from pytensor.tensor import gammaincc, kn, kv, kve, vector from pytensor.tensor import kn, kv, kve, vector
from pytensor.tensor.elemwise import Elemwise from pytensor.tensor.elemwise import Elemwise
from tests import unittest_tools as utt from tests import unittest_tools as utt
from tests.tensor.utils import ( from tests.tensor.utils import (
...@@ -337,25 +337,6 @@ def test_gammainc_ddk_tabulated_values(): ...@@ -337,25 +337,6 @@ def test_gammainc_ddk_tabulated_values():
) )
def test_gammaincc_ddk_performance(benchmark):
rng = np.random.default_rng(1)
k = vector("k")
x = vector("x")
out = gammaincc(k, x)
grad_fn = function(
[k, x], grad(out.sum(), wrt=[k]), mode="FAST_RUN", trust_input=True
)
vals = [
# Values that hit the second branch of the gradient
np.full((1000,), 3.2, dtype=k.dtype),
np.full((1000,), 0.01, dtype=x.dtype),
]
verify_grad(gammaincc, vals, rng=rng)
benchmark(grad_fn, *vals)
TestGammaUBroadcast = makeBroadcastTester( TestGammaUBroadcast = makeBroadcastTester(
op=pt.gammau, op=pt.gammau,
expected=expected_gammau, expected=expected_gammau,
...@@ -888,30 +869,6 @@ class TestHyp2F1Grad: ...@@ -888,30 +869,6 @@ class TestHyp2F1Grad:
rtol=rtol, rtol=rtol,
) )
@pytest.mark.parametrize("case", (few_iters_case, many_iters_case))
@pytest.mark.parametrize("wrt", ("a", "all"))
def test_benchmark(self, case, wrt, benchmark):
a1, a2, b1, z = pt.scalars("a1", "a2", "b1", "z")
hyp2f1_out = pt.hyp2f1(a1, a2, b1, z)
hyp2f1_grad = pt.grad(hyp2f1_out, wrt=a1 if wrt == "a" else [a1, a2, b1, z])
f_grad = function([a1, a2, b1, z], hyp2f1_grad, trust_input=True)
(test_a1, test_a2, test_b1, test_z, *expected_dds) = case
test_a1 = np.array(test_a1, dtype=a1.dtype)
test_a2 = np.array(test_a2, dtype=a2.dtype)
test_b1 = np.array(test_b1, dtype=b1.dtype)
test_z = np.array(test_z, dtype=z.dtype)
result = benchmark(f_grad, test_a1, test_a2, test_b1, test_z)
rtol = 1e-9 if config.floatX == "float64" else 2e-3
expected_result = expected_dds[0] if wrt == "a" else np.array(expected_dds)
np.testing.assert_allclose(
result,
expected_result,
rtol=rtol,
)
@pytest.mark.parametrize("wrt", ([0], [1], [2], [0, 1], [1, 2], [0, 2], [0, 1, 2])) @pytest.mark.parametrize("wrt", ([0], [1], [2], [0, 1], [1, 2], [0, 2], [0, 1, 2]))
def test_unused_grad_loop_opt(self, wrt): def test_unused_grad_loop_opt(self, wrt):
"""Test that we don't compute unnecessary outputs in the grad scalar loop""" """Test that we don't compute unnecessary outputs in the grad scalar loop"""
......
...@@ -4,7 +4,7 @@ import numpy as np ...@@ -4,7 +4,7 @@ import numpy as np
import pytest import pytest
import pytensor import pytensor
from pytensor import In, Mode, Out, function, grad from pytensor import Mode, function, grad
from pytensor.compile.ops import DeepCopyOp from pytensor.compile.ops import DeepCopyOp
from pytensor.configdefaults import config from pytensor.configdefaults import config
from pytensor.graph.basic import Variable, equal_computations from pytensor.graph.basic import Variable, equal_computations
...@@ -382,20 +382,6 @@ class TestReshape(utt.InferShapeTester, utt.OptimizationTestMixin): ...@@ -382,20 +382,6 @@ class TestReshape(utt.InferShapeTester, utt.OptimizationTestMixin):
np.arange(8).reshape(test_shape), np.arange(8).reshape(test_shape),
) )
def test_benchmark(self, benchmark):
x = tensor3("x")
x_val = np.random.random((2, 3, 4)).astype(config.floatX)
y1 = x.reshape((6, 4))
y2 = x.reshape((2, 12))
y3 = x.reshape((-1,))
# Borrow to avoid deepcopy overhead
reshape_fn = pytensor.function(
[In(x, borrow=True)],
[Out(y1, borrow=True), Out(y2, borrow=True), Out(y3, borrow=True)],
)
reshape_fn.trust_input = True
benchmark(reshape_fn, x_val)
def test_shape_i_hash(): def test_shape_i_hash():
assert isinstance(Shape_i(np.int64(1)).__hash__(), int) assert isinstance(Shape_i(np.int64(1)).__hash__(), int)
......
...@@ -81,16 +81,6 @@ def test_cholesky(): ...@@ -81,16 +81,6 @@ def test_cholesky():
check_upper_triangular(pd, ch_f) check_upper_triangular(pd, ch_f)
def test_cholesky_performance(benchmark):
rng = np.random.default_rng(utt.fetch_seed())
r = rng.standard_normal((10, 10)).astype(config.floatX)
pd = np.dot(r, r.T)
x = matrix()
chol = cholesky(x)
ch_f = function([x], chol)
benchmark(ch_f, pd)
def test_cholesky_empty(): def test_cholesky_empty():
empty = np.empty([0, 0], dtype=config.floatX) empty = np.empty([0, 0], dtype=config.floatX)
x = matrix() x = matrix()
......
...@@ -3171,57 +3171,6 @@ def test_flip(size: tuple[int]): ...@@ -3171,57 +3171,6 @@ def test_flip(size: tuple[int]):
np.testing.assert_allclose(expected, f(x), atol=ATOL, rtol=RTOL) np.testing.assert_allclose(expected, f(x), atol=ATOL, rtol=RTOL)
class TestBenchmarks:
@pytest.mark.parametrize(
"static_shape", (False, True), ids=lambda x: f"static_shape={x}"
)
@pytest.mark.parametrize("gc", (False, True), ids=lambda x: f"gc={x}")
def test_advanced_subtensor1(self, static_shape, gc, benchmark):
x = vector("x", shape=(85 if static_shape else None,))
x_values = np.random.normal(size=(85,))
idxs_values = np.arange(85).repeat(11)
# With static shape and constant indices we know all idxs are valid
# And can use faster mode in numpy.take
out = x[idxs_values]
fn = pytensor.function(
[x],
pytensor.Out(out, borrow=True),
on_unused_input="ignore",
trust_input=True,
)
fn.vm.allow_gc = gc
benchmark(fn, x_values, idxs_values)
@pytest.mark.parametrize(
"static_shape", (False, True), ids=lambda x: f"static_shape={x}"
)
@pytest.mark.parametrize("gc", (False, True), ids=lambda x: f"gc={x}")
@pytest.mark.parametrize("func", (inc_subtensor, set_subtensor))
def test_advanced_incsubtensor1(self, func, static_shape, gc, benchmark):
x = vector("x", shape=(85 if static_shape else None,))
x_values = np.zeros((85,))
buffer = ptb.zeros_like(x)
y_values = np.random.normal(size=(85 * 11,))
idxs_values = np.arange(85).repeat(11)
# With static shape and constant indices we know all idxs are valid
# Reuse same buffer of zeros, to check we rather allocate twice than copy inside IncSubtensor
out1 = func(buffer[idxs_values], y_values)
out2 = func(buffer[idxs_values[::-1]], y_values)
fn = pytensor.function(
[x],
[pytensor.Out(out1, borrow=True), pytensor.Out(out2, borrow=True)],
on_unused_input="ignore",
trust_input=True,
)
fn.vm.allow_gc = gc
benchmark(fn, x_values)
def test_subtensor_hash_and_eq(): def test_subtensor_hash_and_eq():
s1 = Subtensor(idx_list=[slice(None, None, None), 0]) s1 = Subtensor(idx_list=[slice(None, None, None), 0])
s2 = Subtensor(idx_list=[slice(None, None, None), 0]) s2 = Subtensor(idx_list=[slice(None, None, None), 0])
......
...@@ -35,7 +35,7 @@ from pytensor.graph.op import Op ...@@ -35,7 +35,7 @@ from pytensor.graph.op import Op
from pytensor.graph.traversal import graph_inputs from pytensor.graph.traversal import graph_inputs
from pytensor.scalar import float64 from pytensor.scalar import float64
from pytensor.scan.op import Scan from pytensor.scan.op import Scan
from pytensor.tensor.math import add, dot, exp, outer, sigmoid, sqr, sqrt, tanh from pytensor.tensor.math import add, dot, exp, outer, sigmoid, sqr, tanh
from pytensor.tensor.math import sum as pt_sum from pytensor.tensor.math import sum as pt_sum
from pytensor.tensor.random import RandomStream from pytensor.tensor.random import RandomStream
from pytensor.tensor.type import ( from pytensor.tensor.type import (
...@@ -1136,33 +1136,6 @@ class TestJacobian: ...@@ -1136,33 +1136,6 @@ class TestJacobian:
val = np.ones((4, 4), dtype=config.floatX) val = np.ones((4, 4), dtype=config.floatX)
np.testing.assert_allclose(func_v(val, val), np.zeros((3, 2, 4, 4))) np.testing.assert_allclose(func_v(val, val), np.zeros((3, 2, 4, 4)))
def test_benchmark(self, vectorize, benchmark):
x = vector("x", shape=(3,))
y = outer(x, x)
jac_y = jacobian(y, x, vectorize=vectorize)
fn = function([x], jac_y, trust_input=True)
benchmark(fn, np.array([0, 1, 2], dtype=x.type.dtype))
def test_benchmark_partial_jacobian(self, vectorize, benchmark):
# Example from https://github.com/jax-ml/jax/discussions/5904#discussioncomment-422956
N = 1000
rng = np.random.default_rng(2025)
x_test = rng.random((N,))
f_mat = rng.random((N, N))
x = vector("x", dtype="float64")
def f(x):
return sqrt(f_mat @ x / N)
full_jacobian = jacobian(f(x), x, vectorize=vectorize)
partial_jacobian = full_jacobian[:5, :5]
f = pytensor.function([x], partial_jacobian, trust_input=True)
benchmark(f, x_test)
def test_hessian(): def test_hessian():
x = vector() x = vector()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论