Refactor numba lapack codegen

19023545 · Ricardo Vieira · Ricardo Vieira · 2774599e · 19023545 · 19023545
--- a/pytensor/link/numba/dispatch/basic.py
+++ b/pytensor/link/numba/dispatch/basic.py
@@ -75,7 +75,7 @@ def numba_njit(*args, fastmath=None, **kwargs):
        message=(
            "(\x1b\\[1m)*"  # ansi escape code for bold text
            "Cannot cache compiled function "
-            '"(numba_funcified_fgraph|store_core_outputs)" '
+            '"(numba_funcified_fgraph|store_core_outputs|cholesky|solve|solve_triangular|cho_solve)" '
            "as it uses dynamic globals"
        ),
        category=NumbaWarning,

--- a/pytensor/link/numba/dispatch/_LAPACK.py
+++ b/pytensor/link/numba/dispatch/_LAPACK.py
@@ -390,3 +390,70 @@ class _LAPACK:
            _ptr_int,  # INFO
        )
        return functype(lapack_ptr)
+    @classmethod
+    def numba_xgttrf(cls, dtype):
+        """
+        Compute the LU factorization of a tridiagonal matrix A using row interchanges.
+        Called by scipy.linalg.lu_factor
+        """
+        lapack_ptr, float_pointer = _get_lapack_ptr_and_ptr_type(dtype, "gttrf")
+        functype = ctypes.CFUNCTYPE(
+            None,
+            _ptr_int,  # N
+            float_pointer,  # DL
+            float_pointer,  # D
+            float_pointer,  # DU
+            float_pointer,  # DU2
+            _ptr_int,  # IPIV
+            _ptr_int,  # INFO
+        )
+        return functype(lapack_ptr)
+    @classmethod
+    def numba_xgttrs(cls, dtype):
+        """
+        Solve a system of linear equations A @ X = B with a tridiagonal matrix A using the LU factorization computed by numba_gttrf.
+        Called by scipy.linalg.lu_solve
+        """
+        lapack_ptr, float_pointer = _get_lapack_ptr_and_ptr_type(dtype, "gttrs")
+        functype = ctypes.CFUNCTYPE(
+            None,
+            _ptr_int,  # TRANS
+            _ptr_int,  # N
+            _ptr_int,  # NRHS
+            float_pointer,  # DL
+            float_pointer,  # D
+            float_pointer,  # DU
+            float_pointer,  # DU2
+            _ptr_int,  # IPIV
+            float_pointer,  # B
+            _ptr_int,  # LDB
+            _ptr_int,  # INFO
+        )
+        return functype(lapack_ptr)
+    @classmethod
+    def numba_xgtcon(cls, dtype):
+        """
+        Estimate the reciprocal of the condition number of a tridiagonal matrix A using the LU factorization computed by numba_gttrf.
+        """
+        lapack_ptr, float_pointer = _get_lapack_ptr_and_ptr_type(dtype, "gtcon")
+        functype = ctypes.CFUNCTYPE(
+            None,
+            _ptr_int,  # NORM
+            _ptr_int,  # N
+            float_pointer,  # DL
+            float_pointer,  # D
+            float_pointer,  # DU
+            float_pointer,  # DU2
+            _ptr_int,  # IPIV
+            float_pointer,  # ANORM
+            float_pointer,  # RCOND
+            float_pointer,  # WORK
+            _ptr_int,  # IWORK
+            _ptr_int,  # INFO
+        )
+        return functype(lapack_ptr)
--- a/pytensor/link/numba/dispatch/linalg/__init__.py
+++ b/pytensor/link/numba/dispatch/linalg/__init__.py
--- a/pytensor/link/numba/dispatch/linalg/decomposition/__init__.py
+++ b/pytensor/link/numba/dispatch/linalg/decomposition/__init__.py
--- a/pytensor/link/numba/dispatch/linalg/decomposition/cholesky.py
+++ b/pytensor/link/numba/dispatch/linalg/decomposition/cholesky.py
+import numpy as np
+from numba.core.extending import overload
+from numba.np.linalg import _copy_to_fortran_order, ensure_lapack
+from scipy import linalg
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    int_ptr_to_val,
+    val_to_int_ptr,
+)
+from pytensor.link.numba.dispatch.linalg.utils import _check_scipy_linalg_matrix
+def _cholesky(a, lower=False, overwrite_a=False, check_finite=True):
+    return (
+        linalg.cholesky(
+            a, lower=lower, overwrite_a=overwrite_a, check_finite=check_finite
+        ),
+        0,
+    )
+@overload(_cholesky)
+def cholesky_impl(A, lower=0, overwrite_a=False, check_finite=True):
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "cholesky")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_potrf = _LAPACK().numba_xpotrf(dtype)
+    def impl(A, lower=0, overwrite_a=False, check_finite=True):
+        _N = np.int32(A.shape[-1])
+        if A.shape[-2] != _N:
+            raise linalg.LinAlgError("Last 2 dimensions of A must be square")
+        UPLO = val_to_int_ptr(ord("L") if lower else ord("U"))
+        N = val_to_int_ptr(_N)
+        LDA = val_to_int_ptr(_N)
+        INFO = val_to_int_ptr(0)
+        if overwrite_a and A.flags.f_contiguous:
+            A_copy = A
+        else:
+            A_copy = _copy_to_fortran_order(A)
+        numba_potrf(
+            UPLO,
+            N,
+            A_copy.view(w_type).ctypes,
+            LDA,
+            INFO,
+        )
+        if lower:
+            for j in range(1, _N):
+                for i in range(j):
+                    A_copy[i, j] = 0.0
+        else:
+            for j in range(_N):
+                for i in range(j + 1, _N):
+                    A_copy[i, j] = 0.0
+        return A_copy, int_ptr_to_val(INFO)
+    return impl
--- a/pytensor/link/numba/dispatch/linalg/solve/__init__.py
+++ b/pytensor/link/numba/dispatch/linalg/solve/__init__.py
--- a/pytensor/link/numba/dispatch/linalg/solve/cholesky.py
+++ b/pytensor/link/numba/dispatch/linalg/solve/cholesky.py
+import numpy as np
+from numba.core.extending import overload
+from numba.np.linalg import ensure_lapack
+from scipy import linalg
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    int_ptr_to_val,
+    val_to_int_ptr,
+)
+from pytensor.link.numba.dispatch.linalg.solve.utils import _solve_check_input_shapes
+from pytensor.link.numba.dispatch.linalg.utils import (
+    _check_scipy_linalg_matrix,
+    _copy_to_fortran_order_even_if_1d,
+    _solve_check,
+)
+def _cho_solve(
+    C: np.ndarray, B: np.ndarray, lower: bool, overwrite_b: bool, check_finite: bool
+):
+    """
+    Solve a positive-definite linear system using the Cholesky decomposition.
+    """
+    return linalg.cho_solve(
+        (C, lower), b=B, overwrite_b=overwrite_b, check_finite=check_finite
+    )
+@overload(_cho_solve)
+def cho_solve_impl(C, B, lower=False, overwrite_b=False, check_finite=True):
+    ensure_lapack()
+    _check_scipy_linalg_matrix(C, "cho_solve")
+    _check_scipy_linalg_matrix(B, "cho_solve")
+    dtype = C.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_potrs = _LAPACK().numba_xpotrs(dtype)
+    def impl(C, B, lower=False, overwrite_b=False, check_finite=True):
+        _solve_check_input_shapes(C, B)
+        _N = np.int32(C.shape[-1])
+        if C.flags.f_contiguous or C.flags.c_contiguous:
+            C_f = C
+            if C.flags.c_contiguous:
+                # An upper/lower triangular c_contiguous is the same as a lower/upper triangular f_contiguous
+                lower = not lower
+        else:
+            C_f = np.asfortranarray(C)
+        if overwrite_b and B.flags.f_contiguous:
+            B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
+        B_is_1d = B.ndim == 1
+        if B_is_1d:
+            B_copy = np.expand_dims(B_copy, -1)
+        NRHS = 1 if B_is_1d else int(B.shape[-1])
+        UPLO = val_to_int_ptr(ord("L") if lower else ord("U"))
+        N = val_to_int_ptr(_N)
+        NRHS = val_to_int_ptr(NRHS)
+        LDA = val_to_int_ptr(_N)
+        LDB = val_to_int_ptr(_N)
+        INFO = val_to_int_ptr(0)
+        numba_potrs(
+            UPLO,
+            N,
+            NRHS,
+            C_f.view(w_type).ctypes,
+            LDA,
+            B_copy.view(w_type).ctypes,
+            LDB,
+            INFO,
+        )
+        _solve_check(_N, int_ptr_to_val(INFO))
+        if B_is_1d:
+            return B_copy[..., 0]
+        return B_copy
+    return impl
--- a/pytensor/link/numba/dispatch/linalg/solve/general.py
+++ b/pytensor/link/numba/dispatch/linalg/solve/general.py
+from collections.abc import Callable
+import numpy as np
+from numba.core.extending import overload
+from numba.np.linalg import _copy_to_fortran_order, ensure_lapack
+from scipy import linalg
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    int_ptr_to_val,
+    val_to_int_ptr,
+)
+from pytensor.link.numba.dispatch.linalg.solve.norm import _xlange
+from pytensor.link.numba.dispatch.linalg.solve.utils import _solve_check_input_shapes
+from pytensor.link.numba.dispatch.linalg.utils import (
+    _check_scipy_linalg_matrix,
+    _copy_to_fortran_order_even_if_1d,
+    _solve_check,
+    _trans_char_to_int,
+)
+def _xgecon(A: np.ndarray, A_norm: float, norm: str) -> tuple[np.ndarray, int]:
+    """
+    Placeholder for computing the condition number of a matrix; used by linalg.solve. Not used by pytensor to numbify
+    graphs.
+    """
+    return  # type: ignore
+@overload(_xgecon)
+def xgecon_impl(
+    A: np.ndarray, A_norm: float, norm: str
+) -> Callable[[np.ndarray, float, str], tuple[np.ndarray, int]]:
+    """
+    Compute the condition number of a matrix A.
+    """
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "gecon")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_gecon = _LAPACK().numba_xgecon(dtype)
+    def impl(A: np.ndarray, A_norm: float, norm: str) -> tuple[np.ndarray, int]:
+        _N = np.int32(A.shape[-1])
+        A_copy = _copy_to_fortran_order(A)
+        N = val_to_int_ptr(_N)
+        LDA = val_to_int_ptr(_N)
+        A_NORM = np.array(A_norm, dtype=dtype)
+        NORM = val_to_int_ptr(ord(norm))
+        RCOND = np.empty(1, dtype=dtype)
+        WORK = np.empty(4 * _N, dtype=dtype)
+        IWORK = np.empty(_N, dtype=np.int32)
+        INFO = val_to_int_ptr(1)
+        numba_gecon(
+            NORM,
+            N,
+            A_copy.view(w_type).ctypes,
+            LDA,
+            A_NORM.view(w_type).ctypes,
+            RCOND.view(w_type).ctypes,
+            WORK.view(w_type).ctypes,
+            IWORK.ctypes,
+            INFO,
+        )
+        return RCOND, int_ptr_to_val(INFO)
+    return impl
+def _getrf(A, overwrite_a=False) -> tuple[np.ndarray, np.ndarray, int]:
+    """
+    Placeholder for LU factorization; used by linalg.solve.
+    # TODO: Implement an LU_factor Op, then dispatch to this function in numba mode.
+    """
+    return  # type: ignore
+@overload(_getrf)
+def getrf_impl(
+    A: np.ndarray, overwrite_a: bool = False
+) -> Callable[[np.ndarray, bool], tuple[np.ndarray, np.ndarray, int]]:
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "getrf")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_getrf = _LAPACK().numba_xgetrf(dtype)
+    def impl(
+        A: np.ndarray, overwrite_a: bool = False
+    ) -> tuple[np.ndarray, np.ndarray, int]:
+        _M, _N = np.int32(A.shape[-2:])  # type: ignore
+        if overwrite_a and A.flags.f_contiguous:
+            A_copy = A
+        else:
+            A_copy = _copy_to_fortran_order(A)
+        M = val_to_int_ptr(_M)  # type: ignore
+        N = val_to_int_ptr(_N)  # type: ignore
+        LDA = val_to_int_ptr(_M)  # type: ignore
+        IPIV = np.empty(_N, dtype=np.int32)  # type: ignore
+        INFO = val_to_int_ptr(0)
+        numba_getrf(M, N, A_copy.view(w_type).ctypes, LDA, IPIV.ctypes, INFO)
+        return A_copy, IPIV, int_ptr_to_val(INFO)
+    return impl
+def _getrs(
+    LU: np.ndarray, B: np.ndarray, IPIV: np.ndarray, trans: int, overwrite_b: bool
+) -> tuple[np.ndarray, int]:
+    """
+    Placeholder for solving a linear system with a matrix that has been LU-factored; used by linalg.solve.
+    # TODO: Implement an LU_solve Op, then dispatch to this function in numba mode.
+    """
+    return  # type: ignore
+@overload(_getrs)
+def getrs_impl(
+    LU: np.ndarray, B: np.ndarray, IPIV: np.ndarray, trans: int, overwrite_b: bool
+) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int, bool], tuple[np.ndarray, int]]:
+    ensure_lapack()
+    _check_scipy_linalg_matrix(LU, "getrs")
+    _check_scipy_linalg_matrix(B, "getrs")
+    dtype = LU.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_getrs = _LAPACK().numba_xgetrs(dtype)
+    def impl(
+        LU: np.ndarray, B: np.ndarray, IPIV: np.ndarray, trans: int, overwrite_b: bool
+    ) -> tuple[np.ndarray, int]:
+        _N = np.int32(LU.shape[-1])
+        _solve_check_input_shapes(LU, B)
+        B_is_1d = B.ndim == 1
+        if overwrite_b and B.flags.f_contiguous:
+            B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
+        if B_is_1d:
+            B_copy = np.expand_dims(B_copy, -1)
+        NRHS = 1 if B_is_1d else int(B_copy.shape[-1])
+        TRANS = val_to_int_ptr(_trans_char_to_int(trans))
+        N = val_to_int_ptr(_N)
+        NRHS = val_to_int_ptr(NRHS)
+        LDA = val_to_int_ptr(_N)
+        LDB = val_to_int_ptr(_N)
+        IPIV = _copy_to_fortran_order(IPIV)
+        INFO = val_to_int_ptr(0)
+        numba_getrs(
+            TRANS,
+            N,
+            NRHS,
+            LU.view(w_type).ctypes,
+            LDA,
+            IPIV.ctypes,
+            B_copy.view(w_type).ctypes,
+            LDB,
+            INFO,
+        )
+        if B_is_1d:
+            B_copy = B_copy[..., 0]
+        return B_copy, int_ptr_to_val(INFO)
+    return impl
+def _solve_gen(
+    A: np.ndarray,
+    B: np.ndarray,
+    lower: bool,
+    overwrite_a: bool,
+    overwrite_b: bool,
+    check_finite: bool,
+    transposed: bool,
+):
+    """Thin wrapper around scipy.linalg.solve. Used as an overload target for numba to avoid unexpected side-effects
+    for users who import pytensor."""
+    return linalg.solve(
+        A,
+        B,
+        lower=lower,
+        overwrite_a=overwrite_a,
+        overwrite_b=overwrite_b,
+        check_finite=check_finite,
+        assume_a="gen",
+        transposed=transposed,
+    )
+@overload(_solve_gen)
+def solve_gen_impl(
+    A: np.ndarray,
+    B: np.ndarray,
+    lower: bool,
+    overwrite_a: bool,
+    overwrite_b: bool,
+    check_finite: bool,
+    transposed: bool,
+) -> Callable[[np.ndarray, np.ndarray, bool, bool, bool, bool, bool], np.ndarray]:
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "solve")
+    _check_scipy_linalg_matrix(B, "solve")
+    def impl(
+        A: np.ndarray,
+        B: np.ndarray,
+        lower: bool,
+        overwrite_a: bool,
+        overwrite_b: bool,
+        check_finite: bool,
+        transposed: bool,
+    ) -> np.ndarray:
+        _N = np.int32(A.shape[-1])
+        _solve_check_input_shapes(A, B)
+        if overwrite_a and A.flags.c_contiguous:
+            # Work with the transposed system to avoid copying A
+            A = A.T
+            transposed = not transposed
+        order = "I" if transposed else "1"
+        norm = _xlange(A, order=order)
+        N = A.shape[1]
+        LU, IPIV, INFO = _getrf(A, overwrite_a=overwrite_a)
+        _solve_check(N, INFO)
+        X, INFO = _getrs(
+            LU=LU, B=B, IPIV=IPIV, trans=transposed, overwrite_b=overwrite_b
+        )
+        _solve_check(N, INFO)
+        RCOND, INFO = _xgecon(LU, norm, "1")
+        _solve_check(N, INFO, True, RCOND)
+        return X
+    return impl
--- a/pytensor/link/numba/dispatch/linalg/solve/norm.py
+++ b/pytensor/link/numba/dispatch/linalg/solve/norm.py
+from collections.abc import Callable
+import numpy as np
+from numba.core.extending import overload
+from numba.np.linalg import _copy_to_fortran_order, ensure_lapack
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    val_to_int_ptr,
+)
+from pytensor.link.numba.dispatch.linalg.utils import _check_scipy_linalg_matrix
+def _xlange(A: np.ndarray, order: str | None = None) -> float:
+    """
+    Placeholder for computing the norm of a matrix; used by linalg.solve. Will never be called in python mode.
+    """
+    return  # type: ignore
+@overload(_xlange)
+def xlange_impl(
+    A: np.ndarray, order: str | None = None
+) -> Callable[[np.ndarray, str], float]:
+    """
+    xLANGE returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the  element of
+    largest absolute value of a matrix A.
+    """
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "norm")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_lange = _LAPACK().numba_xlange(dtype)
+    def impl(A: np.ndarray, order: str | None = None):
+        _M, _N = np.int32(A.shape[-2:])  # type: ignore
+        A_copy = _copy_to_fortran_order(A)
+        M = val_to_int_ptr(_M)  # type: ignore
+        N = val_to_int_ptr(_N)  # type: ignore
+        LDA = val_to_int_ptr(_M)  # type: ignore
+        NORM = (
+            val_to_int_ptr(ord(order))
+            if order is not None
+            else val_to_int_ptr(ord("1"))
+        )
+        WORK = np.empty(_M, dtype=dtype)  # type: ignore
+        result = numba_lange(
+            NORM, M, N, A_copy.view(w_type).ctypes, LDA, WORK.view(w_type).ctypes
+        )
+        return result
+    return impl
--- a/pytensor/link/numba/dispatch/linalg/solve/posdef.py
+++ b/pytensor/link/numba/dispatch/linalg/solve/posdef.py
+from collections.abc import Callable
+import numpy as np
+from numba.core.extending import overload
+from numba.np.linalg import _copy_to_fortran_order, ensure_lapack
+from scipy import linalg
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    int_ptr_to_val,
+    val_to_int_ptr,
+)
+from pytensor.link.numba.dispatch.linalg.solve.norm import _xlange
+from pytensor.link.numba.dispatch.linalg.solve.utils import _solve_check_input_shapes
+from pytensor.link.numba.dispatch.linalg.utils import (
+    _check_scipy_linalg_matrix,
+    _copy_to_fortran_order_even_if_1d,
+    _solve_check,
+)
+def _posv(
+    A: np.ndarray,
+    B: np.ndarray,
+    lower: bool,
+    overwrite_a: bool,
+    overwrite_b: bool,
+    check_finite: bool,
+    transposed: bool,
+) -> tuple[np.ndarray, np.ndarray, int]:
+    """
+    Placeholder for solving a linear system with a positive-definite matrix; used by linalg.solve.
+    """
+    return  # type: ignore
+@overload(_posv)
+def posv_impl(
+    A: np.ndarray,
+    B: np.ndarray,
+    lower: bool,
+    overwrite_a: bool,
+    overwrite_b: bool,
+    check_finite: bool,
+    transposed: bool,
+) -> Callable[
+    [np.ndarray, np.ndarray, bool, bool, bool, bool, bool],
+    tuple[np.ndarray, np.ndarray, int],
+]:
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "solve")
+    _check_scipy_linalg_matrix(B, "solve")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_posv = _LAPACK().numba_xposv(dtype)
+    def impl(
+        A: np.ndarray,
+        B: np.ndarray,
+        lower: bool,
+        overwrite_a: bool,
+        overwrite_b: bool,
+        check_finite: bool,
+        transposed: bool,
+    ) -> tuple[np.ndarray, np.ndarray, int]:
+        _solve_check_input_shapes(A, B)
+        _N = np.int32(A.shape[-1])
+        if overwrite_a and (A.flags.f_contiguous or A.flags.c_contiguous):
+            A_copy = A
+            if A.flags.c_contiguous:
+                # An upper/lower triangular c_contiguous is the same as a lower/upper triangular f_contiguous
+                lower = not lower
+        else:
+            A_copy = _copy_to_fortran_order(A)
+        B_is_1d = B.ndim == 1
+        if overwrite_b and B.flags.f_contiguous:
+            B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
+        if B_is_1d:
+            B_copy = np.expand_dims(B_copy, -1)
+        UPLO = val_to_int_ptr(ord("L") if lower else ord("U"))
+        NRHS = 1 if B_is_1d else int(B.shape[-1])
+        N = val_to_int_ptr(_N)
+        NRHS = val_to_int_ptr(NRHS)
+        LDA = val_to_int_ptr(_N)
+        LDB = val_to_int_ptr(_N)
+        INFO = val_to_int_ptr(0)
+        numba_posv(
+            UPLO,
+            N,
+            NRHS,
+            A_copy.view(w_type).ctypes,
+            LDA,
+            B_copy.view(w_type).ctypes,
+            LDB,
+            INFO,
+        )
+        if B_is_1d:
+            B_copy = B_copy[..., 0]
+        return A_copy, B_copy, int_ptr_to_val(INFO)
+    return impl
+def _pocon(A: np.ndarray, anorm: float) -> tuple[np.ndarray, int]:
+    """
+    Placeholder for computing the condition number of a cholesky-factorized positive-definite matrix. Used by
+    linalg.solve when assume_a = "pos".
+    """
+    return  # type: ignore
+@overload(_pocon)
+def pocon_impl(
+    A: np.ndarray, anorm: float
+) -> Callable[[np.ndarray, float], tuple[np.ndarray, int]]:
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "pocon")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_pocon = _LAPACK().numba_xpocon(dtype)
+    def impl(A: np.ndarray, anorm: float):
+        _N = np.int32(A.shape[-1])
+        A_copy = _copy_to_fortran_order(A)
+        UPLO = val_to_int_ptr(ord("L"))
+        N = val_to_int_ptr(_N)
+        LDA = val_to_int_ptr(_N)
+        ANORM = np.array(anorm, dtype=dtype)
+        RCOND = np.empty(1, dtype=dtype)
+        WORK = np.empty(3 * _N, dtype=dtype)
+        IWORK = np.empty(_N, dtype=np.int32)
+        INFO = val_to_int_ptr(0)
+        numba_pocon(
+            UPLO,
+            N,
+            A_copy.view(w_type).ctypes,
+            LDA,
+            ANORM.view(w_type).ctypes,
+            RCOND.view(w_type).ctypes,
+            WORK.view(w_type).ctypes,
+            IWORK.ctypes,
+            INFO,
+        )
+        return RCOND, int_ptr_to_val(INFO)
+    return impl
+def _solve_psd(
+    A: np.ndarray,
+    B: np.ndarray,
+    lower: bool,
+    overwrite_a: bool,
+    overwrite_b: bool,
+    check_finite: bool,
+    transposed: bool,
+):
+    """Thin wrapper around scipy.linalg.solve for positive-definite matrices. Used as an overload target for numba to
+    avoid unexpected side-effects when users import pytensor."""
+    return linalg.solve(
+        A,
+        B,
+        lower=lower,
+        overwrite_a=overwrite_a,
+        overwrite_b=overwrite_b,
+        check_finite=check_finite,
+        transposed=transposed,
+        assume_a="pos",
+    )
+@overload(_solve_psd)
+def solve_psd_impl(
+    A: np.ndarray,
+    B: np.ndarray,
+    lower: bool,
+    overwrite_a: bool,
+    overwrite_b: bool,
+    check_finite: bool,
+    transposed: bool,
+) -> Callable[[np.ndarray, np.ndarray, bool, bool, bool, bool, bool], np.ndarray]:
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "solve")
+    _check_scipy_linalg_matrix(B, "solve")
+    def impl(
+        A: np.ndarray,
+        B: np.ndarray,
+        lower: bool,
+        overwrite_a: bool,
+        overwrite_b: bool,
+        check_finite: bool,
+        transposed: bool,
+    ) -> np.ndarray:
+        _solve_check_input_shapes(A, B)
+        C, x, info = _posv(
+            A, B, lower, overwrite_a, overwrite_b, check_finite, transposed
+        )
+        _solve_check(A.shape[-1], info)
+        rcond, info = _pocon(C, _xlange(A))
+        _solve_check(A.shape[-1], info=info, lamch=True, rcond=rcond)
+        return x
+    return impl
--- a/pytensor/link/numba/dispatch/linalg/solve/symmetric.py
+++ b/pytensor/link/numba/dispatch/linalg/solve/symmetric.py
+from collections.abc import Callable
+import numpy as np
+from numba.core.extending import overload
+from numba.np.linalg import _copy_to_fortran_order, ensure_lapack
+from scipy import linalg
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    int_ptr_to_val,
+    val_to_int_ptr,
+)
+from pytensor.link.numba.dispatch.linalg.solve.norm import _xlange
+from pytensor.link.numba.dispatch.linalg.solve.utils import _solve_check_input_shapes
+from pytensor.link.numba.dispatch.linalg.utils import (
+    _check_scipy_linalg_matrix,
+    _copy_to_fortran_order_even_if_1d,
+    _solve_check,
+)
+def _sysv(
+    A: np.ndarray, B: np.ndarray, lower: bool, overwrite_a: bool, overwrite_b: bool
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, int]:
+    """
+    Placeholder for solving a linear system with a symmetric matrix; used by linalg.solve.
+    """
+    return  # type: ignore
+@overload(_sysv)
+def sysv_impl(
+    A: np.ndarray, B: np.ndarray, lower: bool, overwrite_a: bool, overwrite_b: bool
+) -> Callable[
+    [np.ndarray, np.ndarray, bool, bool, bool],
+    tuple[np.ndarray, np.ndarray, np.ndarray, int],
+]:
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "sysv")
+    _check_scipy_linalg_matrix(B, "sysv")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_sysv = _LAPACK().numba_xsysv(dtype)
+    def impl(
+        A: np.ndarray, B: np.ndarray, lower: bool, overwrite_a: bool, overwrite_b: bool
+    ):
+        _LDA, _N = np.int32(A.shape[-2:])  # type: ignore
+        _solve_check_input_shapes(A, B)
+        if overwrite_a and (A.flags.f_contiguous or A.flags.c_contiguous):
+            A_copy = A
+            if A.flags.c_contiguous:
+                # An upper/lower triangular c_contiguous is the same as a lower/upper triangular f_contiguous
+                lower = not lower
+        else:
+            A_copy = _copy_to_fortran_order(A)
+        B_is_1d = B.ndim == 1
+        if overwrite_b and B.flags.f_contiguous:
+            B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
+        if B_is_1d:
+            B_copy = np.expand_dims(B_copy, -1)
+        NRHS = 1 if B_is_1d else int(B.shape[-1])
+        UPLO = val_to_int_ptr(ord("L") if lower else ord("U"))
+        N = val_to_int_ptr(_N)  # type: ignore
+        NRHS = val_to_int_ptr(NRHS)
+        LDA = val_to_int_ptr(_LDA)  # type: ignore
+        IPIV = np.empty(_N, dtype=np.int32)  # type: ignore
+        LDB = val_to_int_ptr(_N)  # type: ignore
+        WORK = np.empty(1, dtype=dtype)
+        LWORK = val_to_int_ptr(-1)
+        INFO = val_to_int_ptr(0)
+        # Workspace query
+        numba_sysv(
+            UPLO,
+            N,
+            NRHS,
+            A_copy.view(w_type).ctypes,
+            LDA,
+            IPIV.ctypes,
+            B_copy.view(w_type).ctypes,
+            LDB,
+            WORK.view(w_type).ctypes,
+            LWORK,
+            INFO,
+        )
+        WS_SIZE = np.int32(WORK[0].real)
+        LWORK = val_to_int_ptr(WS_SIZE)
+        WORK = np.empty(WS_SIZE, dtype=dtype)
+        # Actual solve
+        numba_sysv(
+            UPLO,
+            N,
+            NRHS,
+            A_copy.view(w_type).ctypes,
+            LDA,
+            IPIV.ctypes,
+            B_copy.view(w_type).ctypes,
+            LDB,
+            WORK.view(w_type).ctypes,
+            LWORK,
+            INFO,
+        )
+        if B_is_1d:
+            B_copy = B_copy[..., 0]
+        return A_copy, B_copy, IPIV, int_ptr_to_val(INFO)
+    return impl
+def _sycon(A: np.ndarray, ipiv: np.ndarray, anorm: float) -> tuple[np.ndarray, int]:
+    """
+    Placeholder for computing the condition number of a symmetric matrix; used by linalg.solve. Never called in
+    python mode.
+    """
+    return  # type: ignore
+@overload(_sycon)
+def sycon_impl(
+    A: np.ndarray, ipiv: np.ndarray, anorm: float
+) -> Callable[[np.ndarray, np.ndarray, float], tuple[np.ndarray, int]]:
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "sycon")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_sycon = _LAPACK().numba_xsycon(dtype)
+    def impl(A: np.ndarray, ipiv: np.ndarray, anorm: float) -> tuple[np.ndarray, int]:
+        _N = np.int32(A.shape[-1])
+        A_copy = _copy_to_fortran_order(A)
+        N = val_to_int_ptr(_N)
+        LDA = val_to_int_ptr(_N)
+        UPLO = val_to_int_ptr(ord("U"))
+        ANORM = np.array(anorm, dtype=dtype)
+        RCOND = np.empty(1, dtype=dtype)
+        WORK = np.empty(2 * _N, dtype=dtype)
+        IWORK = np.empty(_N, dtype=np.int32)
+        INFO = val_to_int_ptr(0)
+        numba_sycon(
+            UPLO,
+            N,
+            A_copy.view(w_type).ctypes,
+            LDA,
+            ipiv.ctypes,
+            ANORM.view(w_type).ctypes,
+            RCOND.view(w_type).ctypes,
+            WORK.view(w_type).ctypes,
+            IWORK.ctypes,
+            INFO,
+        )
+        return RCOND, int_ptr_to_val(INFO)
+    return impl
+def _solve_symmetric(
+    A: np.ndarray,
+    B: np.ndarray,
+    lower: bool,
+    overwrite_a: bool,
+    overwrite_b: bool,
+    check_finite: bool,
+    transposed: bool,
+):
+    """Thin wrapper around scipy.linalg.solve for symmetric matrices. Used as an overload target for numba to avoid
+    unexpected side-effects when users import pytensor."""
+    return linalg.solve(
+        A,
+        B,
+        lower=lower,
+        overwrite_a=overwrite_a,
+        overwrite_b=overwrite_b,
+        check_finite=check_finite,
+        assume_a="sym",
+        transposed=transposed,
+    )
+@overload(_solve_symmetric)
+def solve_symmetric_impl(
+    A: np.ndarray,
+    B: np.ndarray,
+    lower: bool,
+    overwrite_a: bool,
+    overwrite_b: bool,
+    check_finite: bool,
+    transposed: bool,
+) -> Callable[[np.ndarray, np.ndarray, bool, bool, bool, bool, bool], np.ndarray]:
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "solve")
+    _check_scipy_linalg_matrix(B, "solve")
+    def impl(
+        A: np.ndarray,
+        B: np.ndarray,
+        lower: bool,
+        overwrite_a: bool,
+        overwrite_b: bool,
+        check_finite: bool,
+        transposed: bool,
+    ) -> np.ndarray:
+        _solve_check_input_shapes(A, B)
+        lu, x, ipiv, info = _sysv(A, B, lower, overwrite_a, overwrite_b)
+        _solve_check(A.shape[-1], info)
+        rcond, info = _sycon(lu, ipiv, _xlange(A, order="I"))
+        _solve_check(A.shape[-1], info, True, rcond)
+        return x
+    return impl
--- a/pytensor/link/numba/dispatch/linalg/solve/triangular.py
+++ b/pytensor/link/numba/dispatch/linalg/solve/triangular.py
+import numpy as np
+from numba.core import types
+from numba.core.extending import overload
+from numba.np.linalg import ensure_lapack
+from scipy import linalg
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    int_ptr_to_val,
+    val_to_int_ptr,
+)
+from pytensor.link.numba.dispatch.linalg.solve.utils import _solve_check_input_shapes
+from pytensor.link.numba.dispatch.linalg.utils import (
+    _check_scipy_linalg_matrix,
+    _copy_to_fortran_order_even_if_1d,
+    _solve_check,
+    _trans_char_to_int,
+)
+def _solve_triangular(
+    A, B, trans=0, lower=False, unit_diagonal=False, b_ndim=1, overwrite_b=False
+):
+    """
+    Thin wrapper around scipy.linalg.solve_triangular.
+    This function is overloaded instead of the original scipy function to avoid unexpected side-effects to users who
+    import pytensor.
+    The signature must be the same as solve_triangular_impl, so b_ndim is included, although this argument is not
+    used by scipy.linalg.solve_triangular.
+    """
+    return linalg.solve_triangular(
+        A,
+        B,
+        trans=trans,
+        lower=lower,
+        unit_diagonal=unit_diagonal,
+        overwrite_b=overwrite_b,
+    )
+@overload(_solve_triangular)
+def solve_triangular_impl(A, B, trans, lower, unit_diagonal, b_ndim, overwrite_b):
+    ensure_lapack()
+    _check_scipy_linalg_matrix(A, "solve_triangular")
+    _check_scipy_linalg_matrix(B, "solve_triangular")
+    dtype = A.dtype
+    w_type = _get_underlying_float(dtype)
+    numba_trtrs = _LAPACK().numba_xtrtrs(dtype)
+    if isinstance(dtype, types.Complex):
+        # If you want to make this work with complex numbers make sure you handle the c_contiguous trick correctly
+        raise TypeError(
+            "This function is not expected to work with complex numbers yet"
+        )
+    def impl(A, B, trans, lower, unit_diagonal, b_ndim, overwrite_b):
+        _N = np.int32(A.shape[-1])
+        _solve_check_input_shapes(A, B)
+        # Seems weird to not use the b_ndim input directly, but when I did that Numba complained that the output type
+        # could potentially be 3d (it didn't understand b_ndim was always equal to B.ndim)
+        B_is_1d = B.ndim == 1
+        if A.flags.f_contiguous or (A.flags.c_contiguous and trans in (0, 1)):
+            A_f = A
+            if A.flags.c_contiguous:
+                # An upper/lower triangular c_contiguous is the same as a lower/upper triangular f_contiguous
+                # Is this valid for complex matrices that were .conj().mT by PyTensor?
+                lower = not lower
+                trans = 1 - trans
+        else:
+            A_f = np.asfortranarray(A)
+        if overwrite_b and B.flags.f_contiguous:
+            B_copy = B
+        else:
+            B_copy = _copy_to_fortran_order_even_if_1d(B)
+        if B_is_1d:
+            B_copy = np.expand_dims(B_copy, -1)
+        NRHS = 1 if B_is_1d else int(B_copy.shape[-1])
+        UPLO = val_to_int_ptr(ord("L") if lower else ord("U"))
+        TRANS = val_to_int_ptr(_trans_char_to_int(trans))
+        DIAG = val_to_int_ptr(ord("U") if unit_diagonal else ord("N"))
+        N = val_to_int_ptr(_N)
+        NRHS = val_to_int_ptr(NRHS)
+        LDA = val_to_int_ptr(_N)
+        LDB = val_to_int_ptr(_N)
+        INFO = val_to_int_ptr(0)
+        numba_trtrs(
+            UPLO,
+            TRANS,
+            DIAG,
+            N,
+            NRHS,
+            A_f.view(w_type).ctypes,
+            LDA,
+            B_copy.view(w_type).ctypes,
+            LDB,
+            INFO,
+        )
+        _solve_check(int_ptr_to_val(LDA), int_ptr_to_val(INFO))
+        if B_is_1d:
+            return B_copy[..., 0]
+        return B_copy
+    return impl
--- a/pytensor/link/numba/dispatch/linalg/solve/utils.py
+++ b/pytensor/link/numba/dispatch/linalg/solve/utils.py
+from scipy import linalg
+from pytensor.link.numba.dispatch import basic as numba_basic
+@numba_basic.numba_njit(inline="always")
+def _solve_check_input_shapes(A, B):
+    if A.shape[0] != B.shape[0]:
+        raise linalg.LinAlgError("Dimensions of A and B do not conform")
+    if A.shape[-2] != A.shape[-1]:
+        raise linalg.LinAlgError("Last 2 dimensions of A must be square")
--- a/pytensor/link/numba/dispatch/linalg/utils.py
+++ b/pytensor/link/numba/dispatch/linalg/utils.py
+from collections.abc import Callable
+import numba
+from numba.core import types
+from numba.core.extending import overload
+from numba.np.linalg import _copy_to_fortran_order, ensure_lapack
+from numpy.linalg import LinAlgError
+from pytensor.link.numba.dispatch import basic as numba_basic
+from pytensor.link.numba.dispatch.linalg._LAPACK import (
+    _LAPACK,
+    _get_underlying_float,
+    val_to_int_ptr,
+)
+@numba_basic.numba_njit(inline="always")
+def _copy_to_fortran_order_even_if_1d(x):
+    # Numba's _copy_to_fortran_order doesn't do anything for vectors
+    return x.copy() if x.ndim == 1 else _copy_to_fortran_order(x)
+@numba_basic.numba_njit(inline="always")
+def _trans_char_to_int(trans):
+    if trans not in [0, 1, 2]:
+        raise ValueError('Parameter "trans" should be one of 0, 1, 2')
+    if trans == 0:
+        return ord("N")
+    elif trans == 1:
+        return ord("T")
+    else:
+        return ord("C")
+def _check_scipy_linalg_matrix(a, func_name):
+    """
+    Adapted from https://github.com/numba/numba/blob/bd7ebcfd4b850208b627a3f75d4706000be36275/numba/np/linalg.py#L831
+    """
+    prefix = "scipy.linalg"
+    # Unpack optional type
+    if isinstance(a, types.Optional):
+        a = a.type
+    if not isinstance(a, types.Array):
+        msg = f"{prefix}.{func_name}() only supported for array types"
+        raise numba.TypingError(msg, highlighting=False)
+    if a.ndim not in [1, 2]:
+        msg = (
+            f"{prefix}.{func_name}() only supported on 1d or 2d arrays, found {a.ndim}."
+        )
+        raise numba.TypingError(msg, highlighting=False)
+    if not isinstance(a.dtype, types.Float | types.Complex):
+        msg = f"{prefix}.{func_name}() only supported on float and complex arrays."
+        raise numba.TypingError(msg, highlighting=False)
+@numba_basic.numba_njit(inline="always")
+def _solve_check(n, info, lamch=False, rcond=None):
+    """
+    Check arguments during the different steps of the solution phase
+    Adapted from https://github.com/scipy/scipy/blob/7f7f04caa4a55306a9c6613c89eef91fedbd72d4/scipy/linalg/_basic.py#L38
+    """
+    if info < 0:
+        # TODO: figure out how to do an fstring here
+        msg = "LAPACK reported an illegal value in input"
+        raise ValueError(msg)
+    elif 0 < info:
+        raise LinAlgError("Matrix is singular.")
+    if lamch:
+        E = _xlamch("E")
+        if rcond < E:
+            # TODO: This should be a warning, but we can't raise warnings in numba mode
+            print(  # noqa: T201
+                "Ill-conditioned matrix, rcond=", rcond, ", result may not be accurate."
+            )
+def _xlamch(kind: str = "E"):
+    """
+    Placeholder for getting machine precision; used by linalg.solve. Not used by pytensor to numbify graphs.
+    """
+    pass
+@overload(_xlamch)
+def xlamch_impl(kind: str = "E") -> Callable[[str], float]:
+    """
+    Compute the machine precision for a given floating point type.
+    """
+    from pytensor import config
+    ensure_lapack()
+    w_type = _get_underlying_float(config.floatX)
+    if w_type == "float32":
+        dtype = types.float32
+    elif w_type == "float64":
+        dtype = types.float64
+    else:
+        raise NotImplementedError("Unsupported dtype")
+    numba_lamch = _LAPACK().numba_xlamch(dtype)
+    def impl(kind: str = "E") -> float:
+        KIND = val_to_int_ptr(ord(kind))
+        return numba_lamch(KIND)  # type: ignore
+    return impl
--- a/pytensor/link/numba/dispatch/slinalg.py
+++ b/pytensor/link/numba/dispatch/slinalg.py
--- a/pytensor/tensor/slinalg.py
+++ b/pytensor/tensor/slinalg.py
@@ -566,7 +566,8 @@ class Solve(SolveBase):
        if 1 in allowed_inplace_inputs:
            # Give preference to overwrite_b
            new_props["overwrite_b"] = True
-        else:  # allowed inputs == [0]
+        # We can't overwrite_a if we're assuming tridiagonal
+        elif not self.assume_a == "tridiagonal":  # allowed inputs == [0]
            new_props["overwrite_a"] = True
        return type(self)(**new_props)

--- a/tests/link/numba/test_slinalg.py
+++ b/tests/link/numba/test_slinalg.py
@@ -12,6 +12,8 @@ from pytensor.tensor.slinalg import Cholesky, CholeskySolve, Solve, SolveTriangu
 from tests.link.numba.test_basic import compare_numba_and_py, numba_inplace_mode
+pytestmark = pytest.mark.filterwarnings("error")
 numba = pytest.importorskip("numba")
 floatX = config.floatX
@@ -22,7 +24,7 @@ rng = np.random.default_rng(42849)
 def test_lamch():
    from scipy.linalg import get_lapack_funcs
-    from pytensor.link.numba.dispatch.slinalg import _xlamch
+    from pytensor.link.numba.dispatch.linalg.utils import _xlamch
    @numba.njit()
    def xlamch(kind):
@@ -45,7 +47,7 @@ def test_xlange(ord_numba, ord_scipy):
    # xlange is called internally only, we don't dispatch pt.linalg.norm to it
    from scipy import linalg
-    from pytensor.link.numba.dispatch.slinalg import _xlange
+    from pytensor.link.numba.dispatch.linalg.solve.norm import _xlange
    @numba.njit()
    def xlange(x, ord):
@@ -60,7 +62,8 @@ def test_xgecon(ord_numba, ord_scipy):
    # gecon is called internally only, we don't dispatch pt.linalg.norm to it
    from scipy.linalg import get_lapack_funcs
-    from pytensor.link.numba.dispatch.slinalg import _xgecon, _xlange
+    from pytensor.link.numba.dispatch.linalg.solve.general import _xgecon
+    from pytensor.link.numba.dispatch.linalg.solve.norm import _xlange
    @numba.njit()
    def gecon(x, norm):