remove deprecated MRG_RandomStream

15637d23 · Maxim Kochurov · Maxim Kochurov · cc364dd9 · 15637d23 · cc364dd9
--- a/doc/library/sandbox/index.rst
+++ b/doc/library/sandbox/index.rst
@@ -15,4 +15,3 @@

    linalg
    neighbours
-    rng_mrg
--- a/doc/library/sandbox/rng_mrg.rst
+++ b/doc/library/sandbox/rng_mrg.rst
-.. _libdoc_rng_mrg:
-
-===================================================================
-:mod:`sandbox.rng_mrg` --  MRG random number generator
-===================================================================
-
-.. module:: sandbox.rng_mrg
-   :platform: Unix, Windows
-   :synopsis: MRG random number generator
-.. moduleauthor:: LISA
-
-API
-===
-
-.. automodule:: pytensor.sandbox.rng_mrg
-    :members:
--- a/pytensor/compile/profiling.py
+++ b/pytensor/compile/profiling.py
@@ -1485,7 +1485,6 @@ class ProfileStats:
        from pytensor import scalar as aes
        from pytensor.tensor.elemwise import Elemwise
        from pytensor.tensor.math import Dot
-        from pytensor.tensor.random.op import RandomVariable

        scalar_op_amdlibm_no_speed_up = [
            aes.LT,
@@ -1628,18 +1627,7 @@ class ProfileStats:
                printed_tip = True

        # tip 5
-        for (fgraph, a) in self.apply_time:
-            node = a
-            if isinstance(node.op, RandomVariable):
-                printed_tip = True
-                print(
-                    "  - Replace the default random number generator by "
-                    "'from pytensor.sandbox.rng_mrg import MRG_RandomStream "
-                    "as RandomStream', as this is is faster. It is still "
-                    "experimental, but seems to work correctly.",
-                    file=file,
-                )
-                break
+        # The tip was about MRG_RandomStream which is removed

        # tip 6
        for (fgraph, a) in self.apply_time:

--- a/pytensor/sandbox/multinomial.py
+++ b/pytensor/sandbox/multinomial.py
-import copy
-from typing import Tuple
-
-import numpy as np
-
-import pytensor.tensor as at
-from pytensor.configdefaults import config
-from pytensor.graph.basic import Apply
-from pytensor.link.c.op import COp
-from pytensor.scalar import ScalarType, as_scalar
-from pytensor.tensor.type import discrete_dtypes
-
-
-class MultinomialFromUniform(COp):
-    """
-    Converts samples from a uniform into sample from a multinomial.
-
-    TODO : need description for parameter 'odtype'
-    """
-
-    __props__: Tuple[str, ...] = ("odtype",)
-
-    def __init__(self, odtype):
-        self.odtype = odtype
-
-    def __str__(self):
-        return f"{self.__class__.__name__}{{{self.odtype}}}"
-
-    def __setstate__(self, dct):
-        self.__dict__.update(dct)
-        try:
-            self.odtype
-        except AttributeError:
-            self.odtype = "auto"
-
-    def make_node(self, pvals, unis, n=1):
-        pvals = at.as_tensor_variable(pvals)
-        unis = at.as_tensor_variable(unis)
-        if pvals.ndim != 2:
-            raise NotImplementedError("pvals ndim should be 2", pvals.ndim)
-        if unis.ndim != 1:
-            raise NotImplementedError("unis ndim should be 1", unis.ndim)
-        if self.odtype == "auto":
-            odtype = pvals.dtype
-        else:
-            odtype = self.odtype
-        out = at.tensor(
-            dtype=odtype, shape=tuple(1 if s == 1 else None for s in pvals.type.shape)
-        )
-        return Apply(self, [pvals, unis, as_scalar(n)], [out])
-
-    def grad(self, ins, outgrads):
-        pvals, unis, n = ins
-        (gz,) = outgrads
-        return [
-            at.zeros_like(x, dtype=config.floatX)
-            if x.dtype in discrete_dtypes
-            else at.zeros_like(x)
-            for x in ins
-        ]
-
-    def c_code_cache_version(self):
-        return (8,)
-
-    def c_code(self, node, name, ins, outs, sub):
-        # support old pickled graphs
-        if len(ins) == 2:
-            (pvals, unis) = ins
-            n = 1
-        else:
-            (pvals, unis, n) = ins
-        (z,) = outs
-        if self.odtype == "auto":
-            t = f"PyArray_TYPE({pvals})"
-        else:
-            t = ScalarType(self.odtype).dtype_specs()[1]
-            if t.startswith("pytensor_complex"):
-                t = t.replace("pytensor_complex", "NPY_COMPLEX")
-            else:
-                t = t.upper()
-        fail = sub["fail"]
-        return (
-            """
-        if (PyArray_NDIM(%(pvals)s) != 2)
-        {
-            PyErr_Format(PyExc_TypeError, "pvals ndim should be 2");
-            %(fail)s;
-        }
-        if (PyArray_NDIM(%(unis)s) != 1)
-        {
-            PyErr_Format(PyExc_TypeError, "unis ndim should be 2");
-            %(fail)s;
-        }
-
-        if (PyArray_DIMS(%(unis)s)[0] != (PyArray_DIMS(%(pvals)s)[0] * %(n)s))
-        {
-            PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0] * n");
-            %(fail)s;
-        }
-
-        if ((NULL == %(z)s)
-            || ((PyArray_DIMS(%(z)s))[0] != (PyArray_DIMS(%(pvals)s))[0])
-            || ((PyArray_DIMS(%(z)s))[1] != (PyArray_DIMS(%(pvals)s))[1])
-        )
-        {
-            Py_XDECREF(%(z)s);
-            %(z)s = (PyArrayObject*) PyArray_EMPTY(2,
-                PyArray_DIMS(%(pvals)s),
-                %(t)s,
-                0);
-            if (!%(z)s)
-            {
-                PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
-                %(fail)s;
-            }
-        }
-
-        { // NESTED SCOPE
-
-        const int nb_multi = PyArray_DIMS(%(pvals)s)[0];
-        const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1];
-        const int n_samples = %(n)s;
-
-        //
-        // For each multinomial, loop over each possible outcome
-        //
-        for (int c = 0; c < n_samples; ++c){
-            for (int n = 0; n < nb_multi; ++n)
-            {
-                int waiting = 1;
-                double cummul = 0.;
-                const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*nb_multi + n);
-                for (int m = 0; m < nb_outcomes; ++m)
-                {
-                    dtype_%(z)s* z_nm = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n,m);
-                    const dtype_%(pvals)s* pvals_nm = (dtype_%(pvals)s*)PyArray_GETPTR2(%(pvals)s, n,m);
-                    cummul += *pvals_nm;
-                    if (c == 0)
-                    {
-                        if (waiting && (cummul > *unis_n))
-                        {
-                            *z_nm = 1.;
-                            waiting = 0;
-                        }
-                        else
-                        {
-                            // if we re-used old z pointer, we have to clear it out.
-                            *z_nm = 0.;
-                        }
-                    }
-                    else {
-                        if (cummul > *unis_n)
-                        {
-                            *z_nm = *z_nm + 1.;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-        } // END NESTED SCOPE
-        """
-            % locals()
-        )
-
-    def perform(self, node, ins, outs):
-        # support old pickled graphs
-        if len(ins) == 2:
-            (pvals, unis) = ins
-            n_samples = 1
-        else:
-            (pvals, unis, n_samples) = ins
-        (z,) = outs
-
-        if unis.shape[0] != pvals.shape[0] * n_samples:
-            raise ValueError(
-                "unis.shape[0] != pvals.shape[0] * n_samples",
-                unis.shape[0],
-                pvals.shape[0],
-                n_samples,
-            )
-        if z[0] is None or z[0].shape != pvals.shape:
-            z[0] = np.zeros(pvals.shape, dtype=node.outputs[0].dtype)
-        else:
-            z[0].fill(0)
-
-        nb_multi = pvals.shape[0]
-        # Original version that is not vectorized. I keep it here as
-        # it is more readable.
-        # For each multinomial, loop over each possible outcome
-        # nb_outcomes = pvals.shape[1]
-        # for c in range(n_samples):
-        #    for n in range(nb_multi):
-        #        waiting = True
-        #        cummul = 0
-        #        unis_n = unis[c * nb_multi + n]
-        #        for m in range(nb_outcomes):
-        #            cummul += pvals[n, m]
-        #            if c == 0:
-        #                if (waiting and (cummul > unis_n)):
-        #                    z[0][n, m] = 1
-        #                    waiting = False
-        #                else:
-        #                    # Only needed if we don't init the output to 0
-        #                    z[0][n, m] = 0
-        #            else:
-        #                if (cummul > unis_n):
-        #                    z[0][n, m] += 1
-        #                    break
-
-        # Vectorized version that is much faster as all the looping is
-        # done in C even if this make extra work.
-        for c in range(n_samples):
-            for n in range(nb_multi):
-                unis_n = unis[c * nb_multi + n]
-                # The dtype='float64' is important. Otherwise we don't
-                # have the same answer as the c code as in the c code
-                # the cumul is in double precision.
-                cumsum = pvals[n].cumsum(dtype="float64")
-                z[0][n, np.searchsorted(cumsum, unis_n)] += 1
-
-
-class ChoiceFromUniform(MultinomialFromUniform):
-    """
-    Converts samples from a uniform into sample (without replacement) from a
-    multinomial.
-
-    """
-
-    __props__ = (
-        "odtype",
-        "replace",
-    )
-
-    def __init__(self, odtype, replace=False, *args, **kwargs):
-        self.replace = replace
-        super().__init__(odtype=odtype, *args, **kwargs)
-
-    def __setstate__(self, state):
-        self.__dict__.update(state)
-        if "replace" not in state:
-            self.replace = False
-
-    def make_node(self, pvals, unis, n=1):
-        pvals = at.as_tensor_variable(pvals)
-        unis = at.as_tensor_variable(unis)
-        if pvals.ndim != 2:
-            raise NotImplementedError("pvals ndim should be 2", pvals.ndim)
-        if unis.ndim != 1:
-            raise NotImplementedError("unis ndim should be 1", unis.ndim)
-        if self.odtype == "auto":
-            odtype = "int64"
-        else:
-            odtype = self.odtype
-        out = at.tensor(dtype=odtype, shape=pvals.type.shape)
-        return Apply(self, [pvals, unis, as_scalar(n)], [out])
-
-    def c_code_cache_version(self):
-        return (1,)
-
-    def c_code(self, node, name, ins, outs, sub):
-        (pvals, unis, n) = ins
-        (z,) = outs
-        replace = int(self.replace)
-        if self.odtype == "auto":
-            t = "NPY_INT64"
-        else:
-            t = ScalarType(self.odtype).dtype_specs()[1]
-            if t.startswith("pytensor_complex"):
-                t = t.replace("pytensor_complex", "NPY_COMPLEX")
-            else:
-                t = t.upper()
-        fail = sub["fail"]
-        return (
-            """
-        // create a copy of pvals matrix
-        PyArrayObject* pvals_copy = NULL;
-
-        if (PyArray_NDIM(%(pvals)s) != 2)
-        {
-            PyErr_Format(PyExc_TypeError, "pvals ndim should be 2");
-            %(fail)s;
-        }
-        if (PyArray_NDIM(%(unis)s) != 1)
-        {
-            PyErr_Format(PyExc_TypeError, "unis ndim should be 2");
-            %(fail)s;
-        }
-
-        if ( %(n)s > (PyArray_DIMS(%(pvals)s)[1]) )
-        {
-            PyErr_Format(PyExc_ValueError, "Cannot sample without replacement n samples bigger than the size of the distribution.");
-            %(fail)s;
-        }
-
-        if (PyArray_DIMS(%(unis)s)[0] != (PyArray_DIMS(%(pvals)s)[0] * %(n)s))
-        {
-            PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0] * n");
-            %(fail)s;
-        }
-
-        pvals_copy = (PyArrayObject*) PyArray_EMPTY(2,
-            PyArray_DIMS(%(pvals)s),
-            PyArray_TYPE(%(pvals)s),
-            0);
-
-        if (!pvals_copy)
-        {
-            PyErr_SetString(PyExc_MemoryError, "failed to alloc pvals_copy");
-            %(fail)s;
-        }
-        PyArray_CopyInto(pvals_copy, %(pvals)s);
-
-        if ((NULL == %(z)s)
-            || ((PyArray_DIMS(%(z)s))[0] != (PyArray_DIMS(%(pvals)s))[0])
-            || ((PyArray_DIMS(%(z)s))[1] != %(n)s)
-        )
-        {
-            Py_XDECREF(%(z)s);
-            npy_intp dims[2];
-            dims[0] = PyArray_DIMS(%(pvals)s)[0];
-            dims[1] = %(n)s;
-            %(z)s = (PyArrayObject*) PyArray_EMPTY(2,
-                dims,
-                %(t)s,
-                -1);
-            if (!%(z)s)
-            {
-                PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
-                %(fail)s;
-            }
-        }
-
-        { // NESTED SCOPE
-
-        const int nb_multi = PyArray_DIMS(%(pvals)s)[0];
-        const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1];
-        const int n_samples = %(n)s;
-
-        //
-        // For each multinomial, loop over each possible outcome,
-        // and set selected pval to 0 after being selected
-        //
-        for (int c = 0; c < n_samples; ++c){
-            for (int n = 0; n < nb_multi; ++n)
-            {
-                double cummul = 0.;
-                const dtype_%(unis)s* unis_n = (dtype_%(unis)s*)PyArray_GETPTR1(%(unis)s, c*nb_multi + n);
-                dtype_%(z)s* z_nc = (dtype_%(z)s*)PyArray_GETPTR2(%(z)s, n, c);
-                for (int m = 0; m < nb_outcomes; ++m)
-                {
-                    dtype_%(pvals)s* pvals_nm = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, m);
-                    cummul += *pvals_nm;
-                    if (cummul > *unis_n)
-                    {
-                        *z_nc = m;
-                        // No need to renormalize after the last samples.
-                        if (c == (n_samples - 1))
-                            break;
-                        if (! %(replace)s )
-                        {
-                            // renormalize the nth row of pvals, reuse (cummul-*pvals_nm) to initialize the sum
-                            dtype_%(pvals)s sum = cummul - *pvals_nm;
-                            dtype_%(pvals)s* pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, m);
-                            *pvals_nm = 0.;
-                            for (int k = m; k < nb_outcomes; ++k)
-                            {
-                                sum = sum + *pvals_n;
-                                pvals_n++;
-                            }
-                            pvals_n = (dtype_%(pvals)s*)PyArray_GETPTR2(pvals_copy, n, 0);
-                            for (int k = 0; k < nb_outcomes; ++k)
-                            {
-                                *pvals_n = *pvals_n / sum;
-                                pvals_n++;
-                            }
-                        }
-                        break;
-                    }
-                }
-            }
-        }
-
-        // delete pvals_copy
-        {
-            Py_XDECREF(pvals_copy);
-        }
-        } // END NESTED SCOPE
-        """
-            % locals()
-        )
-
-    def perform(self, node, ins, outs):
-        (pvals, unis, n_samples) = ins
-        # make a copy so we do not overwrite the input
-        pvals = copy.copy(pvals)
-        (z,) = outs
-
-        if n_samples > pvals.shape[1]:
-            raise ValueError(
-                "Cannot sample without replacement n samples "
-                "bigger than the size of the distribution."
-            )
-
-        if unis.shape[0] != pvals.shape[0] * n_samples:
-            raise ValueError(
-                "unis.shape[0] != pvals.shape[0] * n_samples",
-                unis.shape[0],
-                pvals.shape[0],
-                n_samples,
-            )
-
-        if self.odtype == "auto":
-            odtype = "int64"
-        else:
-            odtype = self.odtype
-        if z[0] is None or not np.all(z[0].shape == [pvals.shape[0], n_samples]):
-            z[0] = -1 * np.ones((pvals.shape[0], n_samples), dtype=odtype)
-
-        nb_multi = pvals.shape[0]
-        nb_outcomes = pvals.shape[1]
-
-        # For each multinomial, loop over each possible outcome,
-        # and set selected pval to 0 after being selected
-        for c in range(n_samples):
-            for n in range(nb_multi):
-                cummul = 0
-                unis_n = unis[c * nb_multi + n]
-                for m in range(nb_outcomes):
-                    cummul += pvals[n, m]
-                    if cummul > unis_n:
-                        z[0][n, c] = m
-                        # set to zero and re-normalize so that it's not
-                        # selected again
-                        if not self.replace:
-                            pvals[n, m] = 0.0
-                            pvals[n] /= pvals[n].sum()
-                        break
--- a/pytensor/sandbox/rng_mrg.py
+++ b/pytensor/sandbox/rng_mrg.py
-"""
-Implementation of MRG31k3p random number generator for PyTensor.
-
-Generator code in SSJ package (L'Ecuyer & Simard).
-http://www.iro.umontreal.ca/~simardr/ssj/indexe.html
-
-The MRG31k3p algorithm was published in:
-
-P. L'Ecuyer and R. Touzin, Fast Combined Multiple Recursive Generators with Multipliers of the form a = +/- 2^d +/- 2^e, Proceedings of the 2000 Winter Simulation Conference, Dec. 2000, 683-689.
-
-The conception of the multi-stream from MRG31k3p was published in:
-
-P. L'Ecuyer and R. Simard and E. Jack Chen and W. David Kelton, An Object-Oriented Random-Number Package with Many Long Streams and Substreams, Operations Research, volume 50, number 6, 2002, 1073-1075.
-"""
-
-import warnings
-
-import numpy as np
-
-from pytensor import function, gradient
-from pytensor import scalar as aes
-from pytensor import shared
-from pytensor import tensor as at
-from pytensor.compile import optdb
-from pytensor.configdefaults import config
-from pytensor.gradient import undefined_grad
-from pytensor.graph.basic import Apply, Constant, Variable
-from pytensor.graph.rewriting.basic import in2out, node_rewriter
-from pytensor.link.c.op import COp, Op
-from pytensor.link.c.params_type import ParamsType
-from pytensor.sandbox import multinomial
-from pytensor.scalar import bool as bool_t
-from pytensor.scalar import int32 as int_t
-from pytensor.tensor import as_tensor_variable, cast, get_vector_length
-from pytensor.tensor.math import cos, log, prod, sin, sqrt
-from pytensor.tensor.shape import reshape
-from pytensor.tensor.type import TensorType, iscalar, ivector, lmatrix
-
-
-warnings.warn(
-    "The module `pytensor.sandbox.rng_mrg` is deprecated. "
-    "Use the module `pytensor.tensor.random` for random variables instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-
-def matVecModM(A, s, m):
-    # TODO : need description for method, parameter and return
-    assert A.dtype == "int64"
-    return np.int32(np.sum((A * s) % m, 1) % m)
-
-
-def multMatVect(v, A, m1, B, m2):
-    # TODO : need description for parameter and return
-    """
-    Multiply the first half of v by A with a modulo of m1 and the second half
-    by B with a modulo of m2.
-
-    Notes
-    -----
-    The parameters of dot_modulo are passed implicitly because passing them
-    explicitly takes more time than running the function's C-code.
-
-    """
-    if multMatVect.dot_modulo is None:
-        A_sym = lmatrix("A")
-        s_sym = ivector("s")
-        m_sym = iscalar("m")
-        A2_sym = lmatrix("A2")
-        s2_sym = ivector("s2")
-        m2_sym = iscalar("m2")
-        o = DotModulo()(A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym)
-        multMatVect.dot_modulo = function(
-            [A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym], o, profile=False
-        )
-
-    # This way of calling the PyTensor fct is done to bypass PyTensor overhead.
-    f = multMatVect.dot_modulo
-    f.input_storage[0].storage[0] = A
-    f.input_storage[1].storage[0] = v[:3]
-    f.input_storage[2].storage[0] = m1
-    f.input_storage[3].storage[0] = B
-    f.input_storage[4].storage[0] = v[3:]
-    f.input_storage[5].storage[0] = m2
-    f.vm()
-    r = f.output_storage[0].storage[0]
-
-    return r
-
-
-multMatVect.dot_modulo = None
-
-
-class DotModulo(COp):
-    """
-    Efficient and numerically stable implementation of a dot product followed
-    by a modulo operation. This performs the same function as matVecModM.
-
-    We do this 2 times on 2 triple inputs and concatenating the output.
-
-    """
-
-    __props__ = ()
-
-    def make_node(self, A, s, m, A2, s2, m2):
-        return Apply(self, [A, s, m, A2, s2, m2], [s.type()])
-
-    def perform(self, node, inputs, outputs):
-        (A, s, m, A2, s2, m2) = inputs
-        (out,) = outputs
-        o1 = matVecModM(A, s, m)
-        o2 = matVecModM(A2, s2, m2)
-        out[0] = np.concatenate((o1, o2))
-
-    def c_code_cache_version(self):
-        return (6,)
-
-    def c_code(self, node, name, inputs, outputs, sub):
-        (_A, _s, _m, _A2, _s2, _m2) = inputs
-        (_z,) = outputs
-        return """
-        int osize = -1;
-        if (PyArray_NDIM(%(_A)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(A) != 2"); %(fail)s;}
-        if (PyArray_NDIM(%(_s)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(v) != 1"); %(fail)s;}
-        if (PyArray_NDIM(%(_m)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(m) != 0"); %(fail)s;}
-        if (PyArray_NDIM(%(_A2)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(A2) != 2"); %(fail)s;}
-        if (PyArray_NDIM(%(_s2)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(v2) != 1"); %(fail)s;}
-        if (PyArray_NDIM(%(_m2)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(m2) != 0"); %(fail)s;}
-
-        if( PyArray_DIMS(%(_A)s)[1] != PyArray_DIMS(%(_s)s)[0])
-        {PyErr_SetString(PyExc_NotImplementedError, "A and s shapes don't agree."); %(fail)s;}
-        if( PyArray_DIMS(%(_A2)s)[1] != PyArray_DIMS(%(_s2)s)[0])
-        {PyErr_SetString(PyExc_NotImplementedError, "A2 and s2 shapes don't agree."); %(fail)s;}
-
-        osize = PyArray_DIMS(%(_A)s)[0] + PyArray_DIMS(%(_A2)s)[0];
-        if (!%(_z)s
-            || (PyArray_DIMS(%(_z)s)[0] != osize))
-        {
-            {Py_XDECREF(%(_z)s);}
-            npy_intp dims[] = {0,};
-            dims[0] = osize;
-            %(_z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, PyArray_TYPE(%(_s)s));
-        }
-
-        if(!%(_z)s){%(fail)s;}
-
-        {   //makes it compile even though labels jump over variable definitions.
-
-            // A has size MxN, s has N, output M
-            npy_intp M = PyArray_DIMS(%(_A)s)[0];
-            npy_intp N = PyArray_DIMS(%(_A)s)[1];
-
-            const dtype_%(_A)s* __restrict__ DA = (dtype_%(_A)s*)PyArray_DATA(%(_A)s);
-            dtype_%(_s)s* __restrict__ Ds = (dtype_%(_s)s*)PyArray_DATA(%(_s)s);
-            dtype_%(_z)s* __restrict__ Dz = (dtype_%(_z)s*)PyArray_DATA(%(_z)s);
-            const dtype_%(_m)s m = ((dtype_%(_m)s*)PyArray_DATA(%(_m)s))[0];
-
-            npy_intp SA = PyArray_STRIDES(%(_A)s)[1] / PyArray_DESCR(%(_A)s)->elsize;
-            npy_intp Ss = PyArray_STRIDES(%(_s)s)[0] / PyArray_DESCR(%(_s)s)->elsize;
-            npy_intp Sz = PyArray_STRIDES(%(_z)s)[0] / PyArray_DESCR(%(_z)s)->elsize;
-
-            for (npy_int32 i = 0; i < M; ++i)
-            {
-                const dtype_%(_A)s* __restrict__ Ak = (dtype_%(_A)s*)(PyArray_BYTES(%(_A)s) + PyArray_STRIDES(%(_A)s)[0] * i);
-
-                npy_int64 r = 0;
-
-                for (npy_int32 j = 0; j < N; ++j)
-                {
-                    r += (npy_int64)(Ds[j * Ss] * (npy_int64)(Ak[j * SA])) %% m;
-                }
-
-                Dz[i * Sz] = r %% m;
-            }
-        }
-
-        //redo it with the second triple of inputs
-        {
-            // A has size MxN, s has N, output M
-            npy_intp M = PyArray_DIMS(%(_A2)s)[0];
-            npy_intp N = PyArray_DIMS(%(_A2)s)[1];
-
-            const dtype_%(_A2)s* __restrict__ DA = (dtype_%(_A2)s*)PyArray_DATA(%(_A2)s);
-            dtype_%(_s2)s* __restrict__ Ds = (dtype_%(_s2)s*)PyArray_DATA(%(_s2)s);
-            const dtype_%(_m2)s m = ((dtype_%(_m2)s*)PyArray_DATA(%(_m2)s))[0];
-
-            npy_intp SA = PyArray_STRIDES(%(_A2)s)[1] / PyArray_DESCR(%(_A2)s)->elsize;
-            npy_intp Ss = PyArray_STRIDES(%(_s2)s)[0] / PyArray_DESCR(%(_s2)s)->elsize;
-            npy_intp Sz = PyArray_STRIDES(%(_z)s)[0] / PyArray_DESCR(%(_z)s)->elsize;
-
-            dtype_%(_z)s* __restrict__ Dz = (dtype_%(_z)s*)PyArray_DATA(%(_z)s) + PyArray_DIMS(%(_A)s)[0] * Sz;
-
-            for (npy_int32 i = 0; i < M; ++i)
-            {
-                const dtype_%(_A2)s* __restrict__ Ak = (dtype_%(_A2)s*)(PyArray_BYTES(%(_A2)s) + PyArray_STRIDES(%(_A2)s)[0] * i);
-
-                npy_int64 r = 0;
-
-                for (npy_int32 j = 0; j < N; ++j)
-                {
-                    r += (npy_int64)(Ds[j * Ss] * (npy_int64)(Ak[j * SA])) %% m;
-                }
-
-                Dz[i * Sz] = r %% m;
-            }
-
-        }
-
-        """ % dict(
-            locals(), **sub
-        )
-
-
-# MRG31k3p
-# generator constants :
-M1 = np.asarray(np.int32(2147483647))  # 2^31 - 1
-M2 = np.asarray(np.int32(2147462579))  # 2^31 - 21069
-MASK12 = np.int32(511)  # 2^9 - 1
-MASK13 = np.int32(16777215)  # 2^24 - 1
-MASK2 = np.int32(65535)  # 2^16 - 1
-MULT2 = np.int32(21069)
-NORM = 4.656612873077392578125e-10  # 1./2^31
-
-# A1p0 = np.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]],
-#                      dtype='int64')
-# A2p0 = np.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]],
-#                      dtype='int64')
-
-A1p72 = np.asarray(
-    [
-        [1516919229, 758510237, 499121365],
-        [1884998244, 1516919229, 335398200],
-        [601897748, 1884998244, 358115744],
-    ],
-    dtype="int64",
-)
-A2p72 = np.asarray(
-    [
-        [1228857673, 1496414766, 954677935],
-        [1133297478, 1407477216, 1496414766],
-        [2002613992, 1639496704, 1407477216],
-    ],
-    dtype="int64",
-)
-
-A1p134 = np.asarray(
-    [
-        [1702500920, 1849582496, 1656874625],
-        [828554832, 1702500920, 1512419905],
-        [1143731069, 828554832, 102237247],
-    ],
-    dtype="int64",
-)
-A2p134 = np.asarray(
-    [
-        [796789021, 1464208080, 607337906],
-        [1241679051, 1431130166, 1464208080],
-        [1401213391, 1178684362, 1431130166],
-    ],
-    dtype="int64",
-)
-np_int32_vals = [np.int32(i) for i in (0, 7, 9, 15, 16, 22, 24)]
-
-
-def ff_2p134(rstate):
-    # TODO : need description for method, parameter and return
-    return multMatVect(rstate, A1p134, M1, A2p134, M2)
-
-
-def ff_2p72(rstate):
-    # TODO : need description for method, parameter and return
-    return multMatVect(rstate, A1p72, M1, A2p72, M2)
-
-
-def mrg_next_value(rstate, new_rstate, NORM, mask, offset):
-    # TODO : need description for method, parameter and return
-    x11, x12, x13, x21, x22, x23 = rstate
-    assert isinstance(x11, np.int32)
-
-    i0, i7, i9, i15, i16, i22, i24 = np_int32_vals
-    # first component
-    y1 = ((x12 & MASK12) << i22) + (x12 >> i9) + ((x13 & MASK13) << i7) + (x13 >> i24)
-
-    assert isinstance(y1, np.int32)
-    if y1 < 0 or y1 >= M1:  # must also check overflow
-        y1 -= M1
-    y1 += x13
-    if y1 < 0 or y1 >= M1:
-        y1 -= M1
-
-    x13 = x12
-    x12 = x11
-    x11 = y1
-
-    # second component
-    y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16))
-    assert isinstance(y1, np.int32)
-    if y1 < 0 or y1 >= M2:
-        y1 -= M2
-    y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16))
-    assert isinstance(y2, np.int32)
-    if y2 < 0 or y2 >= M2:
-        y2 -= M2
-    y2 += x23
-    if y2 < 0 or y2 >= M2:
-        y2 -= M2
-    y2 += y1
-    if y2 < 0 or y2 >= M2:
-        y2 -= M2
-
-    x23 = x22
-    x22 = x21
-    x21 = y2
-
-    # Must never return either 0 or M1+1
-    new_rstate[...] = [x11, x12, x13, x21, x22, x23]
-    assert new_rstate.dtype == np.int32
-    if x11 <= x21:
-        return (((x11 - x21 + M1) & mask) + offset) * NORM
-    else:
-        return (((x11 - x21) & mask) + offset) * NORM
-
-
-class mrg_uniform_base(Op):
-    # TODO : need description for class, parameter
-    __props__ = ("output_type", "inplace")
-    params_type = ParamsType(
-        inplace=bool_t,
-        # following params will come from self.output_type.
-        # NB: As output object may not be allocated in C code,
-        # we can not be sure to get these properties from output.
-        # So, we should better get them as params from self.output_type.
-        ndim=int_t,
-        otypenum=int_t,
-        otype_is_float32=bool_t,
-    )
-
-    def __init__(self, output_type, inplace=False):
-        Op.__init__(self)
-        self.output_type = output_type
-        self.inplace = inplace
-        if inplace:
-            self.destroy_map = {0: [0]}
-        self.warned_numpy_version = False
-
-    # These attributes (used as params) are created as properties
-    # to make them available even for old pickled objects, e.g.
-    # when testing old interface or when using FAST_COMPILE mode.
-    ndim = property(lambda self: self.output_type.ndim)
-    otypenum = property(lambda self: np.dtype(self.output_type.dtype).num)
-    otype_is_float32 = property(lambda self: self.output_type.dtype == "float32")
-
-    def __str__(self):
-        if self.inplace:
-            s = "inplace"
-        else:
-            s = "no_inplace"
-        return self.__class__.__name__ + f"{{{self.output_type},{s}}}"
-
-    def grad(self, inputs, ograd):
-        return [
-            gradient.grad_undefined(
-                self, k, inp, "No gradient defined through random sampling op"
-            )
-            for k, inp in enumerate(inputs)
-        ]
-
-    def R_op(self, inputs, eval_points):
-        return [None for i in eval_points]
-
-
-class mrg_uniform(COp, mrg_uniform_base):
-    # CPU VERSION
-    _f16_ok = True
-
-    def make_node(self, rstate, size):
-        # error checking slightly redundant here, since
-        # this op should not be called directly.
-        #
-        # call through MRG_RandomStream instead.
-        out_shape = ()
-        for i in range(self.output_type.ndim):
-            if at.extract_constant(size[i]) == 1:
-                out_shape += (1,)
-            else:
-                out_shape += (None,)
-        output_var = self.output_type.clone(shape=out_shape)()
-        rstate = as_tensor_variable(rstate)
-        size = as_tensor_variable(size)
-        return Apply(self, [rstate, size], [rstate.type(), output_var])
-
-    @classmethod
-    def new(cls, rstate, ndim, dtype, size):
-        v_size = as_tensor_variable(size)
-        if ndim is None:
-            ndim = get_vector_length(v_size)
-        op = cls(TensorType(dtype, shape=(None,) * ndim))
-        return op(rstate, v_size)
-
-    def perform(self, node, inp, out, params):
-        rstate, size = inp
-        o_rstate, o_sample = out
-        n_elements = 1
-        for s in size:
-            n_elements *= s
-        if n_elements > M1:
-            # The limit is on the C code. This perform don't
-            # have this limit.  But to have all of them behave the
-            # same (and have DebugMode don't use too much memory for
-            # some rng_mrg tests) I also add this limit here.
-            raise ValueError("rng_mrg does not support more then (2**31 -1) samples")
-
-        rstate = np.asarray(rstate)  # bring state from XXX if necessary
-        if not self.inplace:
-            rstate = rstate.copy()
-
-        n_streams, _ = rstate.shape
-
-        rval = np.zeros(n_elements, dtype=self.output_type.dtype)
-        if rval.dtype == "float16":
-            mask = 0x7FFF
-            offset = 1
-            NORM = np.float16(3.0458e-05)
-        elif rval.dtype == "float32":
-            mask = 0xFFFFFFFF
-            offset = 0
-            NORM = np.float32(4.6566126e-10)
-        elif rval.dtype == "float64":
-            mask = 0xFFFFFFFF
-            offset = 0
-            NORM = 4.656612873077392578125e-10  # 1./2^31
-
-        err_orig = np.seterr(over="ignore")
-        try:
-            for i in range(n_elements):
-                sample = mrg_next_value(
-                    rstate[i % n_streams],
-                    rstate[i % n_streams],
-                    NORM=NORM,
-                    mask=mask,
-                    offset=offset,
-                )
-                rval[i] = sample
-        finally:
-            np.seterr(**err_orig)
-
-        # send to GPU if necessary
-        o_rstate[0] = node.outputs[0].type.filter(rstate)
-        o_sample[0] = node.outputs[1].type.filter(rval.reshape(size))
-
-    def c_support_code(self, **kwargs):
-        return "\n".join(
-            """
-        void cpu_rng_mrg_uniform_%(dtype)s(PyArrayObject* o_sample, PyArrayObject* o_rstate,
-                                           npy_int64 n_elements, int n_streams) {
-            const npy_int32 i0 = 0;
-            const npy_int32 i7 = 7;
-            const npy_int32 i9 = 9;
-            const npy_int32 i15 = 15;
-            const npy_int32 i16 = 16;
-            const npy_int32 i22 = 22;
-            const npy_int32 i24 = 24;
-
-            const npy_int32 M1 = 2147483647;      //2^31 - 1
-            const npy_int32 M2 = 2147462579;      //2^31 - 21069
-            const npy_int32 MASK12 = 511;       //2^9 - 1
-            const npy_int32 MASK13 = 16777215;  //2^24 - 1
-            const npy_int32 MASK2 = 65535;      //2^16 - 1
-            const npy_int32 MULT2 = 21069;
-
-            %(dtype)s* sample_data = (%(dtype)s *) PyArray_DATA(o_sample);
-            npy_int32* state_data = (npy_int32 *) PyArray_DATA(o_rstate);
-            for (int i = 0; i < n_elements; ++i)
-            {
-                npy_int32 * state_data_i = state_data + (i%%n_streams)*6;
-                npy_int32 y1, y2, x11, x12, x13, x21, x22, x23;
-
-                x11 = state_data_i[0];
-                x12 = state_data_i[1];
-                x13 = state_data_i[2];
-                x21 = state_data_i[3];
-                x22 = state_data_i[4];
-                x23 = state_data_i[5];
-
-                y1 = ((x12 & MASK12) << i22) + (x12 >> i9) + ((x13 & MASK13) << i7) + (x13 >> i24);
-                if ((y1 < 0 || y1 >= M1))     //must also check overflow
-                    y1 -= M1;
-                y1 += x13;
-                if ((y1 < 0 or y1 >= M1))
-                    y1 -= M1;
-                x13 = x12;
-                x12 = x11;
-                x11 = y1;
-
-                y1 = ((x21 & MASK2) << i15) + (MULT2 * (x21 >> i16));
-                if (y1 < 0 || y1 >= M2)
-                    y1 -= M2;
-                y2 = ((x23 & MASK2) << i15) + (MULT2 * (x23 >> i16));
-                if (y2 < 0 || y2 >= M2)
-                    y2 -= M2;
-                y2 += x23;
-                if (y2 < 0 || y2 >= M2)
-                    y2 -= M2;
-                y2 += y1;
-                if (y2 < 0 or y2 >= M2)
-                    y2 -= M2;
-
-                x23 = x22;
-                x22 = x21;
-                x21 = y2;
-
-                if (x11 <= x21) {
-                    assert((x11 - x21 + M1) <= M1);
-                    sample_data[i] = (x11 - x21 + M1) * %(NORM)s;
-                }
-                else
-                {
-                    assert(x11 - x21 <= M1);
-                    sample_data[i] = (x11 - x21) * %(NORM)s;
-                }
-
-                state_data_i[0]= x11;
-                state_data_i[1]= x12;
-                state_data_i[2]= x13;
-                state_data_i[3]= x21;
-                state_data_i[4]= x22;
-                state_data_i[5]= x23;
-            }
-        }
-        """
-            % dict(dtype=dtype, NORM=NORM)
-            for dtype, NORM in (
-                ("npy_float32", "4.6566126e-10f"),
-                ("npy_float64", "4.656612873077392578125e-10"),
-            )
-        )
-
-    def c_code(self, node, name, inp, out, sub):
-        # If we try to use the C code here with something else than a
-        # TensorType, something is wrong.
-        assert isinstance(node.inputs[0].type, TensorType)
-        if self.output_type.dtype == "float16":
-            # C code is not tested, fall back to Python
-            raise NotImplementedError()
-        return """
-        //////// <code generated by mrg_uniform>
-        npy_int64 odims_i;
-        npy_int64 n_elements = 1;
-        int n_streams = 0;
-        int must_alloc_sample = ((NULL == %(o_sample)s)
-                                 || (PyArray_NDIM(%(o_sample)s) != %(params)s->ndim)
-                                 || !(PyArray_ISCONTIGUOUS(%(o_sample)s)));
-        int o_rstate_requirement = %(params)s->inplace ?
-                                    (NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED) :
-                                    (NPY_ARRAY_ENSURECOPY|NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED);
-
-        const npy_int32 i0 = 0;
-        const npy_int32 i7 = 7;
-        const npy_int32 i9 = 9;
-        const npy_int32 i15 = 15;
-        const npy_int32 i16 = 16;
-        const npy_int32 i22 = 22;
-        const npy_int32 i24 = 24;
-
-        const npy_int32 M1 = 2147483647;      //2^31 - 1
-        const npy_int32 M2 = 2147462579;      //2^31 - 21069
-        const npy_int32 MASK12 = 511;       //2^9 - 1
-        const npy_int32 MASK13 = 16777215;  //2^24 - 1
-        const npy_int32 MASK2 = 65535;      //2^16 - 1
-        const npy_int32 MULT2 = 21069;
-
-        // We have to read size[i] as an int64, but odims has to be intp*
-        // for NumPy on 32-bit platforms.
-        npy_intp* odims = (npy_intp*)malloc(%(params)s->ndim * sizeof(npy_intp));
-        if (odims == NULL) {
-            PyErr_NoMemory();
-            %(just_fail)s
-        }
-
-        if (PyArray_NDIM(%(size)s) != 1)
-        {
-            PyErr_SetString(PyExc_ValueError, "size must be vector");
-            %(fail)s
-        }
-        if (PyArray_DIMS(%(size)s)[0] != %(params)s->ndim)
-        {
-            PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)",
-                %(params)s->ndim, int(PyArray_DIMS(%(size)s)[0]));
-            %(fail)s
-        }
-
-        for (int i = 0; i < %(params)s->ndim; ++i)
-        {
-            odims_i = *(dtype_%(size)s *)PyArray_GETPTR1(%(size)s, i);
-            odims[i] = odims_i;
-            n_elements *= odims_i;
-            must_alloc_sample = must_alloc_sample || (PyArray_DIMS(%(o_sample)s)[i] != odims[i]);
-            //fprintf(stderr, "size %%i %%i\\n", i, (int)odims[i]);
-            //printf("%%li", n_elements);
-        }
-        //fprintf(stderr, "n_elements %%lld\\n", (long long)n_elements);
-        if (n_elements > M1)
-        {
-            PyErr_SetString(
-                PyExc_ValueError,
-                "rng_mrg cpu-implementation does not support more than (2**31 -1) samples");
-            %(fail)s
-        }
-
-        if (must_alloc_sample)
-        {
-            Py_XDECREF(%(o_sample)s);
-            %(o_sample)s = (PyArrayObject*)PyArray_SimpleNew(%(params)s->ndim, odims, %(params)s->otypenum);
-            if(!%(o_sample)s) {
-                PyErr_SetString(PyExc_MemoryError, "failed to alloc mrg_uniform output");
-                %(fail)s
-            }
-        }
-        Py_XDECREF(%(o_rstate)s);
-        %(o_rstate)s = (PyArrayObject*)PyArray_FromAny(
-            (PyObject*)%(rstate)s,
-            NULL, 0, 0, o_rstate_requirement,NULL);
-
-        if (PyArray_NDIM(%(o_rstate)s) != 2)
-        {
-            PyErr_SetString(PyExc_ValueError, "rstate must be matrix");
-            %(fail)s
-        }
-        if (PyArray_DIMS(%(o_rstate)s)[1] != 6)
-        {
-            PyErr_Format(PyExc_ValueError, "rstate must have 6 columns");
-            %(fail)s
-        }
-        if (PyArray_DESCR(%(o_rstate)s)->type_num != NPY_INT32)
-        {
-            PyErr_SetString(PyExc_ValueError, "rstate must be int32");
-            %(fail)s
-        }
-        n_streams = PyArray_DIMS(%(o_rstate)s)[0];
-
-        if (%(params)s->otype_is_float32) {
-            cpu_rng_mrg_uniform_npy_float32(%(o_sample)s, %(o_rstate)s, n_elements, n_streams);
-        } else {
-            cpu_rng_mrg_uniform_npy_float64(%(o_sample)s, %(o_rstate)s, n_elements, n_streams);
-        }
-
-        free(odims);
-        //////// </ code generated by mrg_uniform>
-        """ % dict(
-            rstate=inp[0],
-            size=inp[1],
-            o_rstate=out[0],
-            o_sample=out[1],
-            params=sub["params"],
-            just_fail=sub["fail"],
-            fail="""
-                   {
-                       free(odims);
-                       %(fail)s
-                   }
-                   """
-            % dict(fail=sub["fail"]),
-        )
-
-    def c_code_cache_version(self):
-        return (10,)
-
-
-def guess_n_streams(size, warn=False):
-    # TODO : need description for parameter 'size'
-    """
-    Return a guess at a good number of streams.
-
-    Parameters
-    ----------
-    warn : bool, optional
-        If True, warn when a guess cannot be made (in which case we
-        return 60 * 256).
-
-    """
-    # TODO: a smart way of choosing the number of streams, see #612.
-    # Note that this code was moved out of `MRG_RandomStream` so that it can
-    # be easily accessed from tests, where we want to disable the warning.
-    if isinstance(size, (tuple, list)) and all(isinstance(i, int) for i in size):
-        # We can make a guess.
-        r = 1
-        for s in size:
-            r *= s
-        if r > 6:
-            r = r // 6  # chosen as fastest for rbm_benchmark
-
-        # The purpose of sampling from many streams is to be able to use
-        # the GPU to its full capacity. It just wastes RAM and
-        # stream-initialization time to allocate more streams than necessary
-        # for the GPU.
-        # XXX: This number is chosen to be good for 280 and 480 architectures,
-        #      Better would be to use pycuda to query the number of
-        #      processors on the GPU device,
-        #      rather than guessing 60.
-        return min(r, 60 * 256)
-    else:
-        if warn:
-            warnings.warn(
-                (
-                    "MRG_RandomStream can't determine the number ofstreams "
-                    f"from size ({size}), guessing 60*256"
-                ),
-                DeprecationWarning,
-                stacklevel=3,
-            )
-        return 60 * 256
-
-
-class MRG_RandomStream:
-    """
-    Module component with similar interface to numpy.random
-    (numpy.random.RandomState).
-
-    Parameters
-    ----------
-    seed : int or list of 6 int
-        A default seed to initialize the random state.
-        If a single int is given, it will be replicated 6 times.
-        The first 3 values of the seed must all be less than M1 = 2147483647,
-        and not all 0; and the last 3 values must all be less than
-        M2 = 2147462579, and not all 0.
-
-    """
-
-    def updates(self):
-        # TODO : need description for method and return
-        return list(self.state_updates)
-
-    def __init__(self, seed=12345):
-        # A list of pairs of the form (input_r, output_r), representing the
-        # update rules of all the random states generated
-        # by this RandomStream.
-        self.state_updates = []
-
-        super().__init__()
-
-        # Needed to reset the streams.
-        self.default_instance_seed = seed
-
-        self.set_rstate(seed)
-
-    def set_rstate(self, seed):
-        # TODO : need description for method, parameter
-        if isinstance(seed, (int, np.int32, np.int64)):
-            if seed == 0:
-                raise ValueError("seed should not be 0", seed)
-            elif seed >= M2:
-                raise ValueError(f"seed should be less than {int(M2)}", seed)
-            self.rstate = np.asarray([seed] * 6, dtype="int32")
-        elif len(seed) == 6:
-            if seed[0] == 0 and seed[1] == 0 and seed[2] == 0:
-                raise ValueError("The first 3 values of seed should not be all 0", seed)
-            if seed[3] == 0 and seed[4] == 0 and seed[5] == 0:
-                raise ValueError("The last 3 values of seed should not be all 0", seed)
-            if seed[0] >= M1 or seed[1] >= M1 or seed[2] >= M1:
-                raise ValueError(
-                    f"The first 3 values of seed should be less than {int(M1)}", seed
-                )
-            if seed[3] >= M2 or seed[4] >= M2 or seed[5] >= M2:
-                raise ValueError(
-                    f"The last 3 values of seed should be less than {M2}", seed
-                )
-            self.rstate = np.asarray(seed, dtype="int32")
-        else:
-            raise TypeError("seed should be 1 integer or 6 integers")
-
-    def seed(self, seed=None):
-        """
-        Re-initialize each random stream.
-
-        Parameters
-        ----------
-        seed : None or integer in range 0 to 2**30
-            Each random stream will be assigned a unique state that depends
-            deterministically on this value.
-
-        Returns
-        -------
-        None
-
-        """
-        if seed is None:
-            seed = self.default_instance_seed
-        self.set_rstate(seed)
-
-        for old_r, new_r, size, nstreams in self.state_updates:
-            if nstreams is None:
-                nstreams = self.n_streams(size)
-            rstates = self.get_substream_rstates(nstreams, new_r.owner.outputs[1].dtype)
-            assert (
-                old_r.get_value(borrow=True, return_internal_type=True).shape
-                == rstates.shape
-            )
-            assert rstates.dtype == old_r.dtype
-            old_r.set_value(rstates, borrow=True)
-
-    def inc_rstate(self):
-        """
-        Update self.rstate to be skipped 2^134 steps forward to the next stream
-        start.
-
-        """
-        # self.rstate = ff_2p134(self.rstate)
-        self.rstate = multMatVect(self.rstate, A1p134, M1, A2p134, M2)
-        assert self.rstate.dtype == np.int32
-
-    @config.change_flags(compute_test_value="off")
-    def get_substream_rstates(self, n_streams, dtype, inc_rstate=True):
-        # TODO : need description for parameter and return
-        """
-        Initialize a matrix in which each row is a MRG stream state,
-        and they are spaced by 2**72 samples.
-
-        """
-        assert isinstance(dtype, str)
-        assert n_streams < 2**72
-        assert n_streams > 0
-        rval = np.zeros((n_streams, 6), dtype="int32")
-        rval[0] = self.rstate
-
-        # If multMatVect.dot_modulo isn't compiled, compile it.
-        if multMatVect.dot_modulo is None:
-            multMatVect(rval[0], A1p72, M1, A2p72, M2)
-
-        # This way of calling the PyTensor fct is done to bypass PyTensor overhead.
-        f = multMatVect.dot_modulo
-        f.input_storage[0].storage[0] = A1p72
-        f.input_storage[2].storage[0] = M1
-        f.input_storage[3].storage[0] = A2p72
-        f.input_storage[5].storage[0] = M2
-        for i in range(1, n_streams):
-            # Inline the following call to bypass Python overhead
-            # rval[i] = ff_2p72(rval[i - 1])
-            v = rval[i - 1]
-            f.input_storage[1].storage[0] = v[:3]
-            f.input_storage[4].storage[0] = v[3:]
-            f.vm()
-            rval[i] = f.output_storage[0].storage[0]
-
-        if inc_rstate:
-            self.inc_rstate()
-
-        return rval
-
-    def n_streams(self, size):
-        # TODO : need description for method, parameter and return
-        return guess_n_streams(size)
-
-    def pretty_return(self, node_rstate, new_rstate, sample, size, nstreams):
-        # TODO : need description for method, parameter and return
-        sample.rstate = node_rstate
-        sample.update = (node_rstate, new_rstate)
-        self.state_updates.append((node_rstate, new_rstate, size, nstreams))
-        node_rstate.default_update = new_rstate
-        return sample
-
-    def uniform(
-        self, size, low=0.0, high=1.0, ndim=None, dtype=None, nstreams=None, **kwargs
-    ):
-        # TODO : need description for parameter 'size', 'ndim', 'nstreams'
-        """
-        Sample a tensor of given size whose element from a uniform
-        distribution between low and high.
-
-        If the size argument is ambiguous on the number of dimensions,
-        ndim may be a plain integer to supplement the missing information.
-
-        Parameters
-        ----------
-        low
-            Lower bound of the interval on which values are sampled.
-            If the ``dtype`` arg is provided, ``low`` will be cast into
-            dtype. This bound is excluded.
-        high
-            Higher bound of the interval on which values are sampled.
-            If the ``dtype`` arg is provided, ``high`` will be cast into
-            dtype. This bound is excluded.
-        size
-          Can be a list of integer or PyTensor variable (ex: the shape
-          of other PyTensor Variable).
-        dtype
-            The output data type. If dtype is not specified, it will be
-            inferred from the dtype of low and high, but will be at
-            least as precise as floatX.
-
-        """
-        low = as_tensor_variable(low)
-        high = as_tensor_variable(high)
-
-        if dtype is None:
-            dtype = aes.upcast(config.floatX, low.dtype, high.dtype)
-
-        low = cast(low, dtype=dtype)
-        high = cast(high, dtype=dtype)
-
-        low = undefined_grad(low)
-        high = undefined_grad(high)
-
-        if isinstance(size, tuple):
-            msg = "size must be a tuple of int or an PyTensor variable"
-            assert all(isinstance(i, (np.integer, int, Variable)) for i in size), msg
-            if any(isinstance(i, (np.integer, int)) and i <= 0 for i in size):
-                raise ValueError(
-                    "The specified size contains a dimension with value <= 0", size
-                )
-
-        else:
-            if not (isinstance(size, Variable) and size.ndim == 1):
-                raise TypeError(
-                    "size must be a tuple of int or an PyTensor "
-                    "Variable with 1 dimension, got "
-                    + str(size)
-                    + " of type "
-                    + str(type(size))
-                )
-        orig_nstreams = nstreams
-        if nstreams is None:
-            nstreams = self.n_streams(size)
-        rstates = self.get_substream_rstates(nstreams, dtype)
-
-        d = {}
-        if "target" in kwargs:
-            d = dict(target=kwargs.pop("target"))
-        if len(kwargs) > 0:
-            raise TypeError(
-                f"uniform() got unexpected keyword arguments {kwargs.keys()}"
-            )
-        node_rstate = shared(rstates, **d)
-        u = self.pretty_return(
-            node_rstate,
-            *mrg_uniform.new(node_rstate, ndim, dtype, size),
-            size=size,
-            nstreams=orig_nstreams,
-        )
-        r = u * (high - low) + low
-
-        if u.type.broadcastable != r.type.broadcastable:
-            raise NotImplementedError(
-                "Increase the size to match the broadcasting pattern of "
-                "`low` and `high` arguments"
-            )
-
-        assert r.dtype == dtype
-        return r
-
-    def binomial(
-        self, size=None, n=1, p=0.5, ndim=None, dtype="int64", nstreams=None, **kwargs
-    ):
-        # TODO : need description for method, parameter and return
-        if n == 1:
-            p = undefined_grad(as_tensor_variable(p))
-            x = self.uniform(size=size, nstreams=nstreams, **kwargs)
-            return cast(x < p, dtype)
-        else:
-            raise NotImplementedError("MRG_RandomStream.binomial with n > 1")
-
-    def multinomial(
-        self,
-        size=None,
-        n=1,
-        pvals=None,
-        ndim=None,
-        dtype="int64",
-        nstreams=None,
-        **kwargs,
-    ):
-        # TODO : need description for parameter and return
-        """
-        Sample `n` (`n` needs to be >= 1, default 1) times from a multinomial
-        distribution defined by probabilities pvals.
-
-        Example : pvals = [[.98, .01, .01], [.01, .49, .50]] and n=1 will
-        probably result in [[1,0,0],[0,0,1]]. When setting n=2, this
-        will probably result in [[2,0,0],[0,1,1]].
-
-        Notes
-        -----
-        -`size` and `ndim` are only there keep the same signature as other
-        uniform, binomial, normal, etc.
-        TODO : adapt multinomial to take that into account
-
-        -Does not do any value checking on pvals, i.e. there is no
-        check that the elements are non-negative, less than 1, or
-        sum to 1. passing pvals = [[-2., 2.]] will result in
-        sampling [[0, 0]]
-
-        """
-        if pvals is None:
-            raise TypeError("You have to specify pvals")
-        pvals = as_tensor_variable(pvals)
-        pvals = undefined_grad(pvals)
-        if size is not None:
-            if any(isinstance(i, int) and i <= 0 for i in size):
-                raise ValueError(
-                    "The specified size contains a dimension with value <= 0", size
-                )
-
-        if size is not None:
-            raise ValueError(
-                "Provided a size argument to MRG_RandomStream.multinomial, "
-                "which does not use the size argument."
-            )
-        if ndim is not None:
-            raise ValueError(
-                "Provided an ndim argument to MRG_RandomStream.multinomial, "
-                "which does not use the ndim argument."
-            )
-        if pvals.ndim == 2:
-            size = pvals[:, 0].shape * n
-            unis = self.uniform(size=size, ndim=1, nstreams=nstreams, **kwargs)
-            op = multinomial.MultinomialFromUniform(dtype)
-            n_samples = as_tensor_variable(n)
-            return op(pvals, unis, n_samples)
-        else:
-            raise NotImplementedError(
-                "MRG_RandomStream.multinomial only implemented for pvals.ndim = 2"
-            )
-
-    def choice(
-        self,
-        size=1,
-        a=None,
-        replace=True,
-        p=None,
-        ndim=None,
-        dtype="int64",
-        nstreams=None,
-        **kwargs,
-    ):
-        """
-        Sample `size` times from a multinomial distribution defined by
-        probabilities `p`, and returns the indices of the sampled elements.
-        Sampled values are between 0 and `p.shape[1]-1`.
-        Only sampling without replacement is implemented for now.
-
-        Parameters
-        ----------
-        size: integer or integer tensor (default 1)
-            The number of samples. It should be between 1 and `p.shape[1]-1`.
-        a: int or None (default None)
-            For now, a should be None. This function will sample
-            values between 0 and `p.shape[1]-1`. When a != None will be
-            implemented, if `a` is a scalar, the samples are drawn from the
-            range 0,...,a-1. We default to 2 as to have the same interface as
-            RandomStream.
-        replace: bool (default True)
-            Whether the sample is with or without replacement.
-            Only replace=False is implemented for now.
-        p: 2d numpy array or pytensor tensor
-            the probabilities of the distribution, corresponding to values
-            0 to `p.shape[1]-1`.
-
-        Example : p = [[.98, .01, .01], [.01, .49, .50]] and size=1 will
-        probably result in [[0],[2]]. When setting size=2, this
-        will probably result in [[0,1],[2,1]].
-
-        Notes
-        -----
-        -`ndim` is only there keep the same signature as other
-        uniform, binomial, normal, etc.
-
-        -Does not do any value checking on pvals, i.e. there is no
-        check that the elements are non-negative, less than 1, or
-        sum to 1. passing pvals = [[-2., 2.]] will result in
-        sampling [[0, 0]]
-
-        -Only replace=False is implemented for now.
-
-        """
-        if replace:
-            raise NotImplementedError(
-                "MRG_RandomStream.choice only works without replacement for now."
-            )
-
-        if a is not None:
-            raise TypeError(
-                "For now, a has to be None in "
-                "MRG_RandomStream.choice. Sampled values are "
-                "between 0 and p.shape[1]-1"
-            )
-
-        if p is None:
-            raise TypeError(
-                "For now, p has to be specified in MRG_RandomStream.choice."
-            )
-        p = as_tensor_variable(p)
-        p = undefined_grad(p)
-
-        if ndim is not None:
-            raise ValueError("ndim argument to MRG_RandomStream.choice is not used.")
-
-        if p.ndim != 2:
-            raise NotImplementedError(
-                "MRG_RandomStream.choice is only implemented for p.ndim = 2"
-            )
-
-        shape = p[:, 0].shape * size
-        unis = self.uniform(size=shape, ndim=1, nstreams=nstreams, **kwargs)
-        op = multinomial.ChoiceFromUniform(odtype=dtype)
-        return op(p, unis, as_tensor_variable(size))
-
-    def multinomial_wo_replacement(
-        self,
-        size=None,
-        n=1,
-        pvals=None,
-        ndim=None,
-        dtype="int64",
-        nstreams=None,
-        **kwargs,
-    ):
-        warnings.warn(
-            "`MRG_RandomStream.multinomial_wo_replacement` is "
-            "deprecated; use `MRG_RandomStream.choice` instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        assert size is None
-        return self.choice(
-            size=n,
-            a=None,
-            replace=False,
-            p=pvals,
-            dtype=dtype,
-            nstreams=nstreams,
-            ndim=ndim,
-            **kwargs,
-        )
-
-    def normal(
-        self,
-        size,
-        avg=0.0,
-        std=1.0,
-        ndim=None,
-        dtype=None,
-        nstreams=None,
-        truncate=False,
-        **kwargs,
-    ):
-        """
-        Sample a tensor of values from a normal distribution.
-
-        Parameters
-        ----------
-        size : int_vector_like
-            Array dimensions for the output tensor.
-        avg : float_like, optional
-            The mean value for the truncated normal to sample from (defaults to 0.0).
-        std : float_like, optional
-            The standard deviation for the truncated normal to sample from (defaults to 1.0).
-        truncate : bool, optional
-            Truncates the normal distribution at 2 standard deviations if True (defaults to False).
-            When this flag is set, the standard deviation of the result will be less than the one specified.
-        ndim : int, optional
-            The number of dimensions for the output tensor (defaults to None).
-            This argument is necessary if the size argument is ambiguous on the number of dimensions.
-        dtype : str, optional
-            The data-type for the output tensor. If not specified,
-            the dtype is inferred from avg and std, but it is at least as precise as floatX.
-        kwargs
-            Other keyword arguments for random number generation (see uniform).
-
-        Returns
-        -------
-        samples : TensorVariable
-            A PyTensor tensor of samples randomly drawn from a normal distribution.
-
-        """
-        size = _check_size(size)
-        avg = undefined_grad(as_tensor_variable(avg))
-        std = undefined_grad(as_tensor_variable(std))
-
-        if dtype is None:
-            dtype = aes.upcast(config.floatX, avg.dtype, std.dtype)
-
-        avg = at.cast(avg, dtype=dtype)
-        std = at.cast(std, dtype=dtype)
-
-        # generate even number of uniform samples
-        # Do manual constant folding to lower optiimizer work.
-        if isinstance(size, Constant):
-            n_odd_samples = size.prod(dtype="int64")
-        else:
-            n_odd_samples = prod(size, dtype="int64")
-        n_even_samples = n_odd_samples + n_odd_samples % 2
-        uniform = self.uniform(
-            (n_even_samples,),
-            low=0.0,
-            high=1.0,
-            ndim=1,
-            dtype=dtype,
-            nstreams=nstreams,
-            **kwargs,
-        )
-
-        # box-muller transform
-        u1 = uniform[: n_even_samples // 2]
-        u2 = uniform[n_even_samples // 2 :]
-        r = sqrt(-2.0 * log(u1))
-        theta = np.array(2.0 * np.pi, dtype=dtype) * u2
-        cos_theta, sin_theta = cos(theta), sin(theta)
-        z0 = r * cos_theta
-        z1 = r * sin_theta
-
-        if truncate:
-            # use valid samples
-            to_fix0 = (z0 < -2.0) | (z0 > 2.0)
-            to_fix1 = (z1 < -2.0) | (z1 > 2.0)
-            z0_valid = z0[at.nonzero(~to_fix0)]
-            z1_valid = z1[at.nonzero(~to_fix1)]
-
-            # re-sample invalid samples
-            to_fix0 = at.nonzero(to_fix0)[0]
-            to_fix1 = at.nonzero(to_fix1)[0]
-            n_fix_samples = to_fix0.size + to_fix1.size
-            lower = at.constant(1.0 / np.e**2, dtype=dtype)
-            u_fix = self.uniform(
-                (n_fix_samples,),
-                low=lower,
-                high=1.0,
-                ndim=1,
-                dtype=dtype,
-                nstreams=nstreams,
-                **kwargs,
-            )
-            r_fix = sqrt(-2.0 * log(u_fix))
-            z0_fixed = r_fix[: to_fix0.size] * cos_theta[to_fix0]
-            z1_fixed = r_fix[to_fix0.size :] * sin_theta[to_fix1]
-
-            # pack everything together to a useful result
-            norm_samples = at.join(0, z0_valid, z0_fixed, z1_valid, z1_fixed)
-        else:
-            norm_samples = at.join(0, z0, z1)
-        if isinstance(n_odd_samples, Variable):
-            samples = norm_samples[:n_odd_samples]
-        elif n_odd_samples % 2 == 1:
-            samples = norm_samples[:-1]
-        else:
-            samples = norm_samples
-        samples = reshape(samples, newshape=size, ndim=ndim)
-        samples *= std
-        samples += avg
-
-        return samples
-
-    def truncated_normal(
-        self, size, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None, **kwargs
-    ):
-        """
-        Sample a tensor of values from a symmetrically truncated normal distribution.
-
-        Parameters
-        ----------
-        size : int_vector_like
-            Array dimensions for the output tensor.
-        avg : float_like, optional
-            The mean value for the truncated normal to sample from (defaults to 0.0).
-        std : float_like, optional
-            The standard deviation for the truncated normal to sample from (defaults to 1.0).
-        ndim : int, optional
-            The number of dimensions for the output tensor (defaults to None).
-            This argument is necessary if the size argument is ambiguous on the number of dimensions.
-        dtype : str, optional
-            The data-type for the output tensor. If not specified,
-            the dtype is inferred from avg and std, but it is at least as precise as floatX.
-        kwargs
-            Other keyword arguments for random number generation (see uniform).
-
-        Returns
-        -------
-        samples : TensorVariable
-            A PyTensor tensor of samples randomly drawn from a truncated normal distribution.
-
-        See Also
-        --------
-        normal
-        """
-        # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
-        std = std / at.constant(0.87962566103423978)
-        return self.normal(
-            size=size,
-            avg=avg,
-            std=std,
-            truncate=True,
-            ndim=ndim,
-            dtype=dtype,
-            nstreams=nstreams,
-            **kwargs,
-        )
-
-
-def _check_size(size):
-    """
-    Canonicalise inputs to get valid output sizes for PyTensor tensors.
-
-    Parameters
-    ----------
-    size : int_vector_like
-        Some variable that could serve as the shape for an PyTensor tensor.
-        This can be an int, a tuple of ints, a list of ints
-        or an PyTensor Variable with similar properties.
-
-    Returns
-    -------
-    size_var : int_vector
-        A one-dimensional PyTensor variable encapsulating the given size.
-
-    Raises
-    ------
-    ValueError
-        If this method can not build a valid size from the input.
-    """
-    # non-tuple checks and scalar-to-tuple transform
-    if isinstance(size, Variable):
-        if size.ndim == 1:
-            return size
-        elif size.ndim == 0:
-            return at.stack([size], ndim=1)
-        else:
-            raise ValueError(
-                "PyTensor variable must have 1 dimension to be a valid size.", size
-            )
-    elif isinstance(size, (np.integer, int)):
-        return at.constant([size], ndim=1)
-    elif not isinstance(size, (tuple, list)):
-        raise ValueError("Size must be a int, tuple, list or PyTensor variable.", size)
-
-    # check entries of list or tuple
-    for i in size:
-        if isinstance(i, Variable):
-            if i.ndim != 0:
-                raise ValueError("Non-scalar PyTensor variable in size", size, i)
-        elif isinstance(i, (np.integer, int)):
-            if i <= 0:
-                raise ValueError(
-                    "Non-positive dimensions not allowed in size.", size, i
-                )
-        else:
-            raise ValueError(
-                "Only PyTensor variables and integers are allowed in a size-tuple.",
-                size,
-                i,
-            )
-
-    return at.as_tensor_variable(size, ndim=1)
-
-
-@node_rewriter((mrg_uniform_base,))
-def mrg_random_make_inplace(fgraph, node):
-
-    op = node.op
-    if isinstance(op, mrg_uniform_base) and not op.inplace:
-        # op might be gpu version
-        new_op = op.__class__(op.output_type, inplace=True)
-        return new_op.make_node(*node.inputs).outputs
-    return False
-
-
-optdb.register(
-    "random_make_inplace_mrg",
-    in2out(mrg_random_make_inplace, ignore_newtrees=True),
-    "fast_run",
-    "inplace",
-    position=99,
-)
--- a/pytensor/sandbox/samples_MRG31k3p_12_7_5.txt
+++ b/pytensor/sandbox/samples_MRG31k3p_12_7_5.txt
-0.7353244530968368
-0.6142074400559068
-0.11007806099951267
-0.6487741703167558
-0.36619443260133266
-0.2585685825906694
-0.9489980279468
-0.4309556516818702
-0.12257590936496854
-0.9760319022461772
-0.6940806899219751
-0.18046841165050864
-0.003993193618953228
-0.5351603352464736
-0.02472442388534546
-0.7705746139399707
-0.8138928869739175
-0.9650539481081069
-0.24507411010563374
-0.35767574002966285
-0.4939101580530405
-0.9027785388752818
-0.27498403564095497
-0.03848231676965952
-0.3081609820947051
-0.9062023567967117
-0.009030417073518038
-0.7953705741092563
-0.5061718439683318
-0.5975547162815928
-0.5435514179989696
-0.330895590595901
-0.49919482320547104
-0.9409166998229921
-0.8276205519214272
-0.5180770065635443
-0.2319392478093505
-0.36197659047320485
-0.11120751267299056
-0.5018561617471278
-0.47852187464013696
-0.7188052111305296
-0.3030327311716974
-0.6756376498378813
-0.03624899685382843
-0.34987151669338346
-0.031225718092173338
-0.06772322440519929
-0.06820952938869596
-0.9987128847278655
-0.08330700965598226
-0.9731874465942383
-0.6345655219629407
-0.7169904578477144
-0.5793502484448254
-0.7396790678612888
-0.9926023166626692
-0.7522463691420853
-0.6768838302232325
-0.3253784184344113
-0.05375300580635667
-0.4912636987864971
-0.6485021142289042
-0.3043024237267673
-0.24868384934961796
-0.8166692252270877
-0.5274319797754288
-0.31434731651097536
-0.9961257497780025
-0.3549888739362359
-0.8423425843939185
-0.21591948671266437
-0.8698299624957144
-0.17033040337264538
-0.22816143138334155
-0.11795765580609441
-0.7024209997616708
-0.15607220400124788
-0.5493582566268742
-0.5827712984755635
-0.8592293248511851
-0.785309090744704
-0.6115233600139618
-0.019046304281800985
-0.2573754615150392
-0.03130705002695322
-0.6572857238352299
-0.2033171127550304
-0.5058645992539823
-0.15793190989643335
-0.6273676953278482
-0.7285307059064507
-0.265245848800987
-0.6073522809892893
-0.3896624594926834
-0.27189663611352444
-0.705508322454989
-0.12823439668864012
-0.39648046158254147
-0.6584051586687565
-0.07818163838237524
-0.33628708589822054
-0.20613654889166355
-0.4277639244683087
-0.5401185592636466
-0.07513022050261497
-0.4920963351614773
-0.18214095244184136
-0.3235122123733163
-0.29958881670609117
-0.7304665613919497
-0.05146520072594285
-0.2471711952239275
-0.8797005712985992
-0.5029069227166474
-0.526974250562489
-0.15968210343271494
-0.4696163134649396
-0.17607332626357675
-0.362843859475106
-0.7626461815088987
-0.960180682130158
-0.2536660563200712
-0.710880630183965
-0.28728525526821613
-0.78940424695611
-0.5242114691063762
-0.8314367309212685
-0.5898511232808232
-0.015212591737508774
-0.4944482510909438
-0.06396882887929678
-0.519745257217437
-0.3558214954100549
-0.04566589882597327
-0.8368005948141217
-0.979805170558393
-0.7622401369735599
-0.2578657674603164
-0.5378834479488432
-0.9926298237405717
-0.4013678622432053
-0.510077933780849
-0.018817965406924486
-0.21481098141521215
-0.5357040031813085
-0.8512061606161296
-0.009026535786688328
-0.27302876580506563
-0.21162108704447746
-0.5273029855452478
-0.1086404686793685
-0.14079083362594247
-0.14331109775230289
-0.8190496540628374
-0.3947252375073731
-0.28109811525791883
-0.4066850380040705
-0.9154577874578536
-0.8929708409123123
-0.13500721845775843
-0.6328344400972128
-0.5668322211131454
-0.5448646773584187
-0.5418433886952698
-0.1141617177054286
-0.15885689994320273
-0.3867143443785608
-0.5574855520389974
-0.9173167692497373
-0.22908265376463532
-0.2047420055605471
-0.05979115655645728
-0.44121386017650366
-0.9507057839073241
-0.15352962678298354
-0.23290937673300505
-0.46427791472524405
-8.519855327904224E-4
-0.7947354763746262
-0.6385304923169315
-0.8696001935750246
-0.6022149357013404
-0.02299323584884405
-0.5036068987101316
-0.7541037476621568
-0.9995524706318974
-0.5888469088822603
-0.3318097642622888
-0.32492663664743304
-0.6643895329907537
-0.3656829949468374
-0.4912424306385219
-0.1900841724127531
-0.5945985522121191
-0.5709856003522873
-0.35780346347019076
-0.388774358201772
-0.9446004652418196
-0.14594348100945354
-0.6250799335539341
-0.5504232128150761
-0.16380576323717833
-0.7428167965263128
-0.5522975320927799
-0.655389194842428
-0.47579632699489594
-0.29743909696117043
-0.6319712968543172
-0.8178138644434512
-0.2785301594994962
-0.46813122322782874
-0.2898342702537775
-0.3287009159103036
-0.12909299414604902
-0.5859099281951785
-0.1891166502609849
-0.14497734932228923
-0.5543341124430299
-0.11846801871433854
-0.8499364419840276
-0.6603211951442063
-0.35630465345457196
-0.9680569358170033
-0.6639338186942041
-0.24408268369734287
-0.030771974939852953
-0.17226932244375348
-0.7909302446059883
-0.4327161009423435
-0.6732332338578999
-0.0849734228104353
-0.7278832173906267
-0.5536605608649552
-0.7091806619428098
-0.01754110073670745
-0.8406045655719936
-0.4815619965083897
-0.0535086034797132
-0.9874794147908688
-0.07097038673236966
-0.023544831201434135
-0.42413365049287677
-0.2970325672067702
-0.48028060607612133
-0.1990663455799222
-0.6099434774369001
-0.5050413520075381
-0.7814605687744915
-0.2650358658283949
-0.5148864723742008
-0.7807142282836139
-0.0976667134091258
-0.1516015767119825
-0.6566055505536497
-0.3946392172947526
-0.8052488421089947
-0.2964451564475894
-0.07394864456728101
-0.6961450576782227
-0.01576960226520896
-0.3434433783404529
-0.08799878368154168
-0.785557022318244
-0.7494717631489038
-0.45548726338893175
-0.7672475459985435
-0.5134695749729872
-0.7000438082031906
-0.49818582693114877
-0.4293400440365076
-0.9961911663413048
-0.016769078094512224
-0.013044610153883696
-0.8661804771982133
-0.7819683295674622
-0.33438047766685486
-0.966121535282582
-0.7259743176400661
-0.9887824659235775
-0.9494950002990663
-0.037431647535413504
-0.8268285538069904
-0.7355263698846102
-0.3120658891275525
-0.3588241692632437
-0.471130283549428
-0.7047113911248744
-0.980073744431138
-0.6762627908028662
-0.869295812677592
-0.9070576094090939
-0.7852784115821123
-0.16342713963240385
-0.06330870278179646
-0.6165989111177623
-0.342802997212857
-0.8414176292717457
-0.6921333004720509
-0.2594374935142696
-0.4386491202749312
-0.555369642097503
-0.3660965468734503
-0.6484139142557979
-0.9005299550481141
-0.25335891311988235
-0.23852926725521684
-0.9044205779209733
-0.8694673446007073
-0.46783560374751687
-0.34727911837399006
-0.19556640228256583
-0.8798208390362561
-0.3131108647212386
-0.6312824171036482
-0.5722001581452787
-0.9441223978064954
-0.7707183314487338
-0.17464511329308152
-0.08897313429042697
-0.5044040409848094
-0.5735817537643015
-0.4467783076688647
-0.19051036844030023
-0.4578995378687978
-0.6395204453729093
-0.460110604763031
-0.576092894654721
-0.7038368303328753
-0.5555814192630351
-0.4171535111963749
-0.8905360852368176
-0.12811446748673916
-0.6814800254069269
-0.8502416326664388
-0.12028768053278327
-0.16715052351355553
-0.3563938206061721
-0.049810963682830334
-0.27328392397612333
-0.2407418810762465
-0.6631906591355801
-0.674483266659081
-0.10489491606131196
-0.04698043642565608
-0.0812066881917417
-0.312124056275934
-0.6798701109364629
-0.7286937129683793
-0.9784366562962532
-0.5650205011479557
-0.833059043623507
-0.8976074242964387
-0.9441233519464731
-0.6146679543890059
-0.9019614770077169
-0.5529476394876838
-0.7665416682139039
-0.39598167687654495
-0.26307358546182513
-0.14862705068662763
-0.9521124185994267
-0.17644333699718118
-0.7684473628178239
-0.4274347145110369
-0.6102834036573768
-0.9328651092946529
-0.058630190789699554
-0.04729347629472613
-0.9597438890486956
-0.6761234584264457
-0.21832499839365482
-0.20707347383722663
-0.7274158899672329
-0.9477886455133557
-0.7821800266392529
-0.07305240212008357
-0.40399201214313507
-0.22684293938800693
-0.053185423370450735
-0.330069282092154
-0.6862794999033213
-0.7821815954521298
-0.22617859859019518
-0.8118352359160781
-0.015444065444171429
-0.6732339109294116
-0.9980663135647774
-0.8833195753395557
-0.21191661106422544
-0.32638366147875786
-0.5747208022512496
-0.07515769777819514
-0.02952938713133335
-0.4980746121145785
-0.8762881984002888
-0.17386484891176224
-0.10696181375533342
-0.5474299816414714
-0.016154434997588396
-0.6960771018639207
-0.47133891424164176
-0.9015861176885664
-0.782880718819797
-0.6602211343124509
-0.6578835439868271
-0.6049443730153143
-0.17169494135305285
-0.9915955001488328
-0.10519243823364377
-0.37815978936851025
-0.20879409136250615
-0.45666090911254287
-0.6456936108879745
-0.684759714640677
-0.8762755445204675
-0.8020628895610571
-0.1663151141256094
-0.31246642768383026
-0.18852565623819828
--- a/tests/misc/test_pkl_utils.py
+++ b/tests/misc/test_pkl_utils.py
@@ -6,7 +6,6 @@ import numpy as np

 import pytensor
 from pytensor.misc.pkl_utils import StripPickler, dump, load
-from pytensor.sandbox.rng_mrg import MRG_RandomStream
 from pytensor.tensor.type import matrix


@@ -23,17 +22,6 @@ class TestDumpLoad:
        if self.tmpdir is not None:
            shutil.rmtree(self.tmpdir)

-    def test_dump_load_mrg(self):
-        rng = MRG_RandomStream()
-
-        with open("test", "wb") as f:
-            dump(rng, f)
-
-        with open("test", "rb") as f:
-            rng = load(f)
-
-        assert type(rng) == MRG_RandomStream
-
    def test_dump_zip_names(self):
        foo_1 = pytensor.shared(0, name="foo")
        foo_2 = pytensor.shared(1, name="foo")

--- a/tests/sandbox/test_multinomial.py
+++ b/tests/sandbox/test_multinomial.py
-import numpy as np
-
-import tests.unittest_tools as utt
-from pytensor import function
-from pytensor.configdefaults import config
-from pytensor.sandbox.multinomial import MultinomialFromUniform
-from pytensor.tensor.type import dmatrix, dvector, fmatrix, fvector, iscalar
-
-
-def test_n_samples_1():
-    p = fmatrix()
-    u = fvector()
-    n = iscalar()
-    m = MultinomialFromUniform("auto")(p, u, n)
-
-    f = function([p, u, n], m, allow_input_downcast=True)
-
-    rng = np.random.default_rng(12345)
-    for i in [1, 5, 10, 100, 1000, 10000]:
-        uni = rng.random(2 * i).astype(config.floatX)
-        res = f([[1.0, 0.0], [0.0, 1.0]], uni, i)
-        utt.assert_allclose(res, [[i * 1.0, 0.0], [0.0, i * 1.0]])
-
-
-def test_n_samples_2():
-    p = fmatrix()
-    u = fvector()
-    n = iscalar()
-    m = MultinomialFromUniform("auto")(p, u, n)
-
-    f = function([p, u, n], m, allow_input_downcast=True)
-
-    rng = np.random.default_rng(12345)
-
-    for i in [1, 5, 10, 100, 1000]:
-        uni = rng.random(i).astype(config.floatX)
-        pvals = rng.integers(1, 1000, (1, 1000)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        res = f(pvals, uni, i)
-        assert res.sum() == i
-
-    for i in [1, 5, 10, 100, 1000]:
-        uni = rng.random(i).astype(config.floatX)
-        pvals = rng.integers(1, 1000000, (1, 1000000)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        res = f(pvals, uni, i)
-        assert res.sum() == i
-
-
-def test_multinomial_0():
-    # This tests the MultinomialFromUniform Op directly, not going through the
-    # multinomial() call in GPU random generation.
-
-    p = fmatrix()
-    u = fvector()
-
-    m = MultinomialFromUniform("auto")(p, u)
-
-    # the m*2 allows the multinomial to reuse output
-    f = function([p, u], m * 2, allow_input_downcast=True)
-
-    # test that both first and second samples can be drawn
-    utt.assert_allclose(f([[1, 0], [0, 1]], [0.1, 0.1]), [[2, 0], [0, 2]])
-
-    # test that both second labels can be drawn
-    r = f([[0.2, 0.8], [0.3, 0.7]], [0.31, 0.31])
-    utt.assert_allclose(r, [[0, 2], [0, 2]])
-
-    # test that both first labels can be drawn
-    r = f([[0.2, 0.8], [0.3, 0.7]], [0.21, 0.21])
-    utt.assert_allclose(r, [[0, 2], [2, 0]])
-
-    # change the size to make sure output gets reallocated ok
-    # and also make sure that the GPU version doesn't screw up the
-    # transposed-ness
-    r = f([[0.2, 0.8]], [0.25])
-    utt.assert_allclose(r, [[0, 2]])
-
-
-# TODO: check a bigger example (make sure blocking on GPU is handled correctly)
-def test_multinomial_large():
-    p = fmatrix()
-    u = fvector()
-    m = MultinomialFromUniform("auto")(p, u)
-    f = function([p, u], m * 2, allow_input_downcast=True)
-
-    pval = np.arange(10000 * 4, dtype="float32").reshape((10000, 4)) + 0.1
-    pval = pval / pval.sum(axis=1)[:, None]
-    uval = np.ones_like(pval[:, 0]) * 0.5
-    mval = f(pval, uval)
-
-    assert mval.shape == pval.shape
-    if config.cast_policy == "custom":
-        assert mval.dtype == pval.dtype
-    elif config.cast_policy == "numpy+floatX":
-        assert mval.dtype == config.floatX
-    elif config.cast_policy == "numpy":
-        assert mval.dtype == "float64"
-    else:
-        raise NotImplementedError(config.cast_policy)
-    utt.assert_allclose(mval.sum(axis=1), 2)
-    asdf = np.asarray([0, 0, 2, 0]) + 0 * pval
-    utt.assert_allclose(mval, asdf)  # broadcast over all rows
-
-
-def test_multinomial_dtypes():
-    p = dmatrix()
-    u = dvector()
-    m = MultinomialFromUniform("auto")(p, u)
-    assert m.dtype == "float64", m.dtype
-
-    p = fmatrix()
-    u = fvector()
-    m = MultinomialFromUniform("auto")(p, u)
-    assert m.dtype == "float32", m.dtype
-
-    p = fmatrix()
-    u = fvector()
-    m = MultinomialFromUniform("float64")(p, u)
-    assert m.dtype == "float64", m.dtype
--- a/tests/sandbox/test_multinomial_wo_replacement.py
+++ b/tests/sandbox/test_multinomial_wo_replacement.py
-import numpy as np
-import pytest
-
-from pytensor import function
-from pytensor.configdefaults import config
-from pytensor.sandbox import multinomial
-from pytensor.sandbox.rng_mrg import MRG_RandomStream as RandomStream
-from pytensor.tensor.type import fmatrix, fvector, iscalar
-
-
-class TestOP:
-    @pytest.mark.xfail(
-        reason="This test is designed around very specific random draws from the old NumPy API"
-    )
-    def test_select_distinct(self):
-        # Tests that ChoiceFromUniform always selects distinct elements
-
-        p = fmatrix()
-        u = fvector()
-        n = iscalar()
-        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)
-
-        f = function([p, u, n], m, allow_input_downcast=True)
-
-        n_elements = 1000
-        all_indices = range(n_elements)
-
-        rng = np.random.default_rng(12345)
-
-        expected = [
-            np.asarray([[931, 318, 185, 209, 559]]),
-            np.asarray([[477, 887, 2, 717, 333, 665, 159, 559, 348, 136]]),
-            np.asarray(
-                [
-                    [
-                        546,
-                        28,
-                        79,
-                        665,
-                        295,
-                        779,
-                        433,
-                        531,
-                        411,
-                        716,
-                        244,
-                        234,
-                        70,
-                        88,
-                        612,
-                        639,
-                        383,
-                        335,
-                        451,
-                        100,
-                        175,
-                        492,
-                        848,
-                        771,
-                        559,
-                        214,
-                        568,
-                        596,
-                        370,
-                        486,
-                        855,
-                        925,
-                        138,
-                        300,
-                        528,
-                        507,
-                        730,
-                        199,
-                        882,
-                        357,
-                        58,
-                        195,
-                        705,
-                        900,
-                        66,
-                        468,
-                        513,
-                        410,
-                        816,
-                        672,
-                    ]
-                ]
-            ),
-        ]
-
-        for i in [5, 10, 50, 100, 500, n_elements]:
-            uni = rng.random(i).astype(config.floatX)
-            pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-            pvals /= pvals.sum(1)
-            res = f(pvals, uni, i)
-            for ii in range(len(expected)):
-                if expected[ii].shape == res.shape:
-                    assert (expected[ii] == res).all()
-            res = np.squeeze(res)
-            assert len(res) == i
-            assert np.all(np.in1d(np.unique(res), all_indices)), res
-
-    def test_fail_select_alot(self):
-        # Tests that ChoiceFromUniform fails when asked to sample more
-        # elements than the actual number of elements
-
-        p = fmatrix()
-        u = fvector()
-        n = iscalar()
-        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)
-
-        f = function([p, u, n], m, allow_input_downcast=True)
-
-        n_elements = 100
-        n_selected = 200
-        rng = np.random.default_rng(12345)
-        uni = rng.random(n_selected).astype(config.floatX)
-        pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        with pytest.raises(ValueError):
-            f(pvals, uni, n_selected)
-
-    def test_select_proportional_to_weight(self):
-        # Tests that ChoiceFromUniform selects elements, on average,
-        # proportional to the their probabilities
-
-        p = fmatrix()
-        u = fvector()
-        n = iscalar()
-        m = multinomial.ChoiceFromUniform(odtype="auto")(p, u, n)
-
-        f = function([p, u, n], m, allow_input_downcast=True)
-
-        n_elements = 100
-        n_selected = 10
-        mean_rtol = 0.0005
-        rng = np.random.default_rng(12345)
-        pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        avg_pvals = np.zeros((n_elements,), dtype=config.floatX)
-
-        for rep in range(10000):
-            uni = rng.random(n_selected).astype(config.floatX)
-            res = f(pvals, uni, n_selected)
-            res = np.squeeze(res)
-            avg_pvals[res] += 1
-        avg_pvals /= avg_pvals.sum()
-        avg_diff = np.mean(abs(avg_pvals - pvals))
-        assert avg_diff < mean_rtol, avg_diff
-
-
-class TestFunction:
-    def test_select_distinct(self):
-        # Tests that multinomial_wo_replacement always selects distinct elements
-
-        th_rng = RandomStream(12345)
-
-        p = fmatrix()
-        n = iscalar()
-        with pytest.deprecated_call():
-            m = th_rng.multinomial_wo_replacement(pvals=p, n=n)
-
-        f = function([p, n], m, allow_input_downcast=True)
-
-        n_elements = 1000
-        all_indices = range(n_elements)
-        rng = np.random.default_rng(12345)
-        for i in [5, 10, 50, 100, 500, n_elements]:
-            pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-            pvals /= pvals.sum(1)
-            res = f(pvals, i)
-            res = np.squeeze(res)
-            assert len(res) == i
-            assert np.all(np.in1d(np.unique(res), all_indices)), res
-
-    def test_fail_select_alot(self):
-        # Tests that multinomial_wo_replacement fails when asked to sample more
-        # elements than the actual number of elements
-
-        th_rng = RandomStream(12345)
-
-        p = fmatrix()
-        n = iscalar()
-        with pytest.deprecated_call():
-            m = th_rng.multinomial_wo_replacement(pvals=p, n=n)
-
-        f = function([p, n], m, allow_input_downcast=True)
-
-        n_elements = 100
-        n_selected = 200
-        rng = np.random.default_rng(12345)
-        pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        with pytest.raises(ValueError):
-            f(pvals, n_selected)
-
-    def test_select_proportional_to_weight(self):
-        # Tests that multinomial_wo_replacement selects elements, on average,
-        # proportional to the their probabilities
-
-        th_rng = RandomStream(12345)
-
-        p = fmatrix()
-        n = iscalar()
-        m = th_rng.choice(size=n, p=p, replace=False)
-
-        f = function([p, n], m, allow_input_downcast=True)
-
-        n_elements = 100
-        n_selected = 10
-        mean_rtol = 0.0005
-        rng = np.random.default_rng(12345)
-        pvals = rng.integers(1, 100, (1, n_elements)).astype(config.floatX)
-        pvals /= pvals.sum(1)
-        avg_pvals = np.zeros((n_elements,), dtype=config.floatX)
-
-        for rep in range(10000):
-            res = f(pvals, n_selected)
-            res = np.squeeze(res)
-            avg_pvals[res] += 1
-        avg_pvals /= avg_pvals.sum()
-        avg_diff = np.mean(abs(avg_pvals - pvals))
-        assert avg_diff < mean_rtol
--- a/tests/sandbox/test_rng_mrg.py
+++ b/tests/sandbox/test_rng_mrg.py
-import contextlib
-import os
-import sys
-import time
-
-import numpy as np
-import pytest
-
-import pytensor
-from pytensor.compile.function import function
-from pytensor.compile.sharedvalue import shared
-from pytensor.configdefaults import config
-from pytensor.gradient import NullTypeGradError, UndefinedGrad, grad, zero_grad
-from pytensor.sandbox import rng_mrg
-from pytensor.sandbox.rng_mrg import MRG_RandomStream, mrg_uniform
-from pytensor.scan.basic import scan
-from pytensor.tensor.basic import as_tensor_variable, cast
-from pytensor.tensor.math import sum as at_sum
-from pytensor.tensor.random.utils import RandomStream
-from pytensor.tensor.type import iscalar, ivector, lmatrix, matrix, scalar, vector
-from tests import unittest_tools as utt
-
-
-# TODO: test MRG_RandomStream
-# Partly done in test_consistency_randomstreams
-
-# TODO: test optimizer mrg_random_make_inplace
-
-
-# Results generated by Java code using L'Ecuyer et al.'s code, with:
-# main seed: [12345]*6 (default)
-# 12 streams
-# 7 substreams for each stream
-# 5 samples drawn from each substream
-java_samples = np.loadtxt(
-    os.path.join(
-        os.path.split(pytensor.__file__)[0], "sandbox", "samples_MRG31k3p_12_7_5.txt"
-    )
-)
-
-
-def test_deterministic():
-    seed = utt.fetch_seed()
-    sample_size = (10, 20)
-
-    R = MRG_RandomStream(seed=seed)
-    u = R.uniform(size=sample_size)
-    f = function([], u)
-
-    fsample1 = f()
-    fsample2 = f()
-    assert not np.allclose(fsample1, fsample2)
-
-    R2 = MRG_RandomStream(seed=seed)
-    u2 = R2.uniform(size=sample_size)
-    g = function([], u2)
-    gsample1 = g()
-    gsample2 = g()
-    assert np.allclose(fsample1, gsample1)
-    assert np.allclose(fsample2, gsample2)
-
-
-def test_consistency_randomstreams():
-    # Verify that the random numbers generated by MRG_RandomStream
-    # are the same as the reference (Java) implementation by L'Ecuyer et al.
-    seed = 12345
-    n_samples = 5
-    n_streams = 12
-    n_substreams = 7
-
-    samples = []
-    rng = MRG_RandomStream(seed=seed)
-    for i in range(n_streams):
-        stream_samples = []
-        u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
-        f = function([], u)
-        for j in range(n_samples):
-            s = f()
-            stream_samples.append(s)
-        stream_samples = np.array(stream_samples)
-        stream_samples = stream_samples.T.flatten()
-        samples.append(stream_samples)
-
-    samples = np.array(samples).flatten()
-    assert np.allclose(samples, java_samples)
-
-
-def test_get_substream_rstates():
-    with config.change_flags(compute_test_value="raise"):
-        n_streams = 100
-
-        dtype = "float32"
-        rng = MRG_RandomStream(
-            np.random.default_rng(utt.fetch_seed()).integers(2147462579)
-        )
-
-        rng.get_substream_rstates(n_streams, dtype)
-
-
-def test_consistency_cpu_serial():
-    # Verify that the random numbers generated by mrg_uniform, serially,
-    # are the same as the reference (Java) implementation by L'Ecuyer et al.
-
-    seed = 12345
-    n_samples = 5
-    n_streams = 12
-    n_substreams = 7
-
-    samples = []
-    curr_rstate = np.array([seed] * 6, dtype="int32")
-
-    for i in range(n_streams):
-        stream_rstate = curr_rstate.copy()
-        for j in range(n_substreams):
-            rstate = shared(np.array([stream_rstate.copy()], dtype="int32"))
-            new_rstate, sample = rng_mrg.mrg_uniform.new(
-                rstate, ndim=None, dtype=config.floatX, size=(1,)
-            )
-            # Not really necessary, just mimicking
-            # rng_mrg.MRG_RandomStream' behavior
-            sample.rstate = rstate
-            sample.update = (rstate, new_rstate)
-
-            rstate.default_update = new_rstate
-            f = function([], sample)
-            for k in range(n_samples):
-                s = f()
-                samples.append(s)
-
-            # next substream
-            stream_rstate = rng_mrg.ff_2p72(stream_rstate)
-
-        # next stream
-        curr_rstate = rng_mrg.ff_2p134(curr_rstate)
-
-    samples = np.array(samples).flatten()
-    assert np.allclose(samples, java_samples)
-
-
-def test_consistency_cpu_parallel():
-    # Verify that the random numbers generated by mrg_uniform, in parallel,
-    # are the same as the reference (Java) implementation by L'Ecuyer et al.
-
-    seed = 12345
-    n_samples = 5
-    n_streams = 12
-    n_substreams = 7  # 7 samples will be drawn in parallel
-
-    samples = []
-    curr_rstate = np.array([seed] * 6, dtype="int32")
-
-    for i in range(n_streams):
-        stream_samples = []
-        rstate = [curr_rstate.copy()]
-        for j in range(1, n_substreams):
-            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
-        rstate = np.asarray(rstate)
-        rstate = shared(rstate)
-
-        new_rstate, sample = rng_mrg.mrg_uniform.new(
-            rstate, ndim=None, dtype=config.floatX, size=(n_substreams,)
-        )
-        # Not really necessary, just mimicking
-        # rng_mrg.MRG_RandomStream' behavior
-        sample.rstate = rstate
-        sample.update = (rstate, new_rstate)
-
-        rstate.default_update = new_rstate
-        f = function([], sample)
-
-        for k in range(n_samples):
-            s = f()
-            stream_samples.append(s)
-
-        samples.append(np.array(stream_samples).T.flatten())
-
-        # next stream
-        curr_rstate = rng_mrg.ff_2p134(curr_rstate)
-
-    samples = np.array(samples).flatten()
-    assert np.allclose(samples, java_samples)
-
-
-def check_basics(
-    f,
-    steps,
-    sample_size,
-    prefix="",
-    allow_01=False,
-    inputs=None,
-    target_avg=0.5,
-    target_std=None,
-    mean_rtol=0.01,
-    std_tol=0.01,
-):
-    if inputs is None:
-        inputs = []
-    dt = 0.0
-    avg_var = 0.0
-
-    for i in range(steps):
-        t0 = time.perf_counter()
-        ival = f(*inputs)
-        assert ival.shape == sample_size
-        dt += time.perf_counter() - t0
-        ival = np.asarray(ival)
-        if i == 0:
-            mean = np.array(ival, copy=True)
-            avg_var = np.mean((ival - target_avg) ** 2)
-            min_ = ival.min()
-            max_ = ival.max()
-        else:
-            alpha = 1.0 / (1 + i)
-            mean = alpha * ival + (1 - alpha) * mean
-            avg_var = alpha * np.mean((ival - target_avg) ** 2) + (1 - alpha) * avg_var
-            min_ = min(min_, ival.min())
-            max_ = max(max_, ival.max())
-        if not allow_01:
-            assert min_ > 0
-            assert max_ < 1
-
-    if hasattr(target_avg, "shape"):  # looks if target_avg is an array
-        diff = np.mean(abs(mean - target_avg))
-        # print prefix, 'mean diff with mean', diff
-        assert np.all(
-            diff < mean_rtol * (1 + abs(target_avg))
-        ), f"bad mean? {mean} {target_avg}"
-    else:
-        # if target_avg is a scalar, then we can do the mean of
-        # `mean` to get something more precise
-        mean = np.mean(mean)
-        # print prefix, 'mean', mean
-        assert abs(mean - target_avg) < mean_rtol * (
-            1 + abs(target_avg)
-        ), f"bad mean? {mean:f} {target_avg:f}"
-
-    std = np.sqrt(avg_var)
-    # print prefix, 'var', avg_var
-    # print prefix, 'std', std
-    if target_std is not None:
-        assert abs(std - target_std) < std_tol * (
-            1 + abs(target_std)
-        ), f"bad std? {std:f} {target_std:f} {std_tol:f}"
-    # print prefix, 'time', dt
-    # print prefix, 'elements', steps * sample_size[0] * sample_size[1]
-    # print prefix, 'samples/sec', steps * sample_size[0] * sample_size[1] / dt
-    # print prefix, 'min', min_, 'max', max_
-
-
-@pytest.mark.slow
-def test_uniform():
-    # TODO: test param low, high
-    # TODO: test size=None
-    # TODO: test ndim!=size.ndim
-    # TODO: test bad seed
-    # TODO: test size=Var, with shape that change from call to call
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (10, 100)
-        steps = 50
-    else:
-        sample_size = (500, 50)
-        steps = int(1e3)
-
-    x = matrix()
-    for size, const_size, var_input, input in [
-        (sample_size, sample_size, [], []),
-        (x.shape, sample_size, [x], [np.zeros(sample_size, dtype=config.floatX)]),
-        (
-            (x.shape[0], sample_size[1]),
-            sample_size,
-            [x],
-            [np.zeros(sample_size, dtype=config.floatX)],
-        ),
-        # test empty size (scalar)
-        ((), (), [], []),
-    ]:
-
-        # TEST CPU IMPLEMENTATION
-        # The python and C implementation are tested with DebugMode
-        x = matrix()
-        R = MRG_RandomStream(234)
-        # Note: we specify `nstreams` to avoid a warning.
-        # TODO Look for all occurrences of `guess_n_streams` and `30 * 256`
-        # for such situations: it would be better to instead filter the
-        # warning using the warning module.
-        u = R.uniform(size=size, nstreams=rng_mrg.guess_n_streams(size, warn=False))
-        f = function(var_input, u)
-        assert any(
-            isinstance(node.op, mrg_uniform) for node in f.maker.fgraph.toposort()
-        )
-        f(*input)
-
-        # Increase the number of steps if sizes implies only a few samples
-        if np.prod(const_size) < 10:
-            steps_ = steps * 100
-        else:
-            steps_ = steps
-        check_basics(f, steps_, const_size, prefix="mrg cpu", inputs=input)
-
-        RR = RandomStream(234)
-
-        uu = RR.uniform(size=size)
-        ff = function(var_input, uu)
-        # It's not our problem if numpy generates 0 or 1
-        check_basics(
-            ff, steps_, const_size, prefix="numpy", allow_01=True, inputs=input
-        )
-
-
-def test_broadcastable():
-    R = MRG_RandomStream(234)
-    x = matrix()
-    size1 = (10, 1)
-    size2 = (x.shape[0], 1)
-    pvals_1 = np.random.uniform(0, 1, size=size1)
-    pvals_1 = pvals_1 / sum(pvals_1)
-    pvals_2 = R.uniform(size=size2)
-    pvals_2 = pvals_2 / at_sum(pvals_2)
-
-    for distribution in [
-        R.uniform,
-        R.normal,
-        R.truncated_normal,
-        R.binomial,
-        R.multinomial,
-        R.multinomial_wo_replacement,
-    ]:
-        # multinomial or multinomial_wo_replacement does not support "size" argument,
-        # the sizes of them are implicitly defined with "pvals" argument.
-        if distribution in [R.multinomial, R.multinomial_wo_replacement]:
-            # check when all dimensions are constant
-            context_mgr = (
-                pytest.deprecated_call()
-                if distribution == R.multinomial_wo_replacement
-                else contextlib.suppress()
-            )
-
-            with context_mgr:
-                uu = distribution(pvals=pvals_1)
-                assert uu.broadcastable == (False, True)
-
-            # check when some dimensions are pytensor variables
-            with context_mgr:
-                uu = distribution(pvals=pvals_2)
-                assert uu.broadcastable == (False, True)
-        else:
-            # check when all dimensions are constant
-            uu = distribution(size=size1)
-            assert uu.broadcastable == (False, True)
-
-            # check when some dimensions are pytensor variables
-            uu = distribution(size=size2)
-            assert uu.broadcastable == (False, True)
-
-
-def check_binomial(mean, size, const_size, var_input, input, steps, rtol):
-    R = MRG_RandomStream(234)
-    u = R.binomial(size=size, p=mean)
-    f = function(var_input, u)
-    f(*input)
-
-    # Increase the number of steps if sizes implies only a few samples
-    if np.prod(const_size) < 10:
-        steps_ = steps * 100
-    else:
-        steps_ = steps
-    check_basics(
-        f,
-        steps_,
-        const_size,
-        prefix="mrg  cpu",
-        inputs=input,
-        allow_01=True,
-        target_avg=mean,
-        mean_rtol=rtol,
-    )
-
-    RR = RandomStream(234)
-
-    uu = RR.binomial(1, mean, size=size)
-    ff = function(var_input, uu)
-    # It's not our problem if numpy generates 0 or 1
-    check_basics(
-        ff,
-        steps_,
-        const_size,
-        prefix="numpy",
-        allow_01=True,
-        inputs=input,
-        target_avg=mean,
-        mean_rtol=rtol,
-    )
-
-
-@pytest.mark.slow
-def test_binomial():
-    # TODO: test size=None, ndim=X
-    # TODO: test size=X, ndim!=X.ndim
-    # TODO: test random seed in legal value(!=0 and other)
-    # TODO: test sample_size not a multiple of guessed #streams
-    # TODO: test size=Var, with shape that change from call to call
-    # we test size in a tuple of int and a tensor.shape.
-    # we test the param p with int.
-
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (10, 50)
-        steps = 50
-        rtol = 0.02
-    else:
-        sample_size = (500, 50)
-        steps = int(1e3)
-        rtol = 0.01
-
-    x = matrix()
-    for mean in [0.1, 0.5]:
-        for size, const_size, var_input, input in [
-            (sample_size, sample_size, [], []),
-            (x.shape, sample_size, [x], [np.zeros(sample_size, dtype=config.floatX)]),
-            # test empty size (scalar)
-            ((), (), [], []),
-        ]:
-            check_binomial(mean, size, const_size, var_input, input, steps, rtol)
-
-
-@pytest.mark.slow
-def test_normal0():
-    steps = 50
-    std = 2.0
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (25, 30)
-        default_rtol = 0.02
-    else:
-        sample_size = (999, 50)
-        default_rtol = 0.01
-    sample_size_odd = (sample_size[0], sample_size[1] - 1)
-    x = matrix()
-
-    test_cases = [
-        (sample_size, sample_size, [], [], -5.0, default_rtol, default_rtol),
-        (
-            x.shape,
-            sample_size,
-            [x],
-            [np.zeros(sample_size, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        # test odd value
-        (
-            x.shape,
-            sample_size_odd,
-            [x],
-            [np.zeros(sample_size_odd, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        (
-            sample_size,
-            sample_size,
-            [],
-            [],
-            np.arange(np.prod(sample_size), dtype="float32").reshape(sample_size),
-            10.0 * std / np.sqrt(steps),
-            default_rtol,
-        ),
-        # test empty size (scalar)
-        ((), (), [], [], -5.0, default_rtol, 0.02),
-        # test with few samples at the same time
-        ((1,), (1,), [], [], -5.0, default_rtol, 0.02),
-        ((3,), (3,), [], [], -5.0, default_rtol, 0.02),
-    ]
-
-    for size, const_size, var_input, input, avg, rtol, std_tol in test_cases:
-        R = MRG_RandomStream(234)
-        # Note: we specify `nstreams` to avoid a warning.
-        n = R.normal(
-            size=size,
-            avg=avg,
-            std=std,
-            nstreams=rng_mrg.guess_n_streams(size, warn=False),
-        )
-        f = function(var_input, n)
-        f(*input)
-
-        # Increase the number of steps if size implies only a few samples
-        if np.prod(const_size) < 10:
-            steps_ = steps * 50
-        else:
-            steps_ = steps
-        check_basics(
-            f,
-            steps_,
-            const_size,
-            target_avg=avg,
-            target_std=std,
-            prefix="mrg ",
-            allow_01=True,
-            inputs=input,
-            mean_rtol=rtol,
-            std_tol=std_tol,
-        )
-
-        sys.stdout.flush()
-
-        RR = RandomStream(235)
-
-        nn = RR.normal(avg, std, size=size)
-        ff = function(var_input, nn)
-
-        check_basics(
-            ff,
-            steps_,
-            const_size,
-            target_avg=avg,
-            target_std=std,
-            prefix="numpy ",
-            allow_01=True,
-            inputs=input,
-            mean_rtol=rtol,
-        )
-
-
-@pytest.mark.slow
-def test_normal_truncation():
-    # just a copy of test_normal0 with extra bound check
-    steps = 50
-    std = 2.0
-    # standard deviation is slightly less than for a regular Gaussian
-    # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
-    target_std = 0.87962566103423978 * std
-
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (25, 30)
-        default_rtol = 0.02
-    else:
-        sample_size = (999, 50)
-        default_rtol = 0.01
-    sample_size_odd = (sample_size[0], sample_size[1] - 1)
-    x = matrix()
-
-    test_cases = [
-        (sample_size, sample_size, [], [], -5.0, default_rtol, default_rtol),
-        (
-            x.shape,
-            sample_size,
-            [x],
-            [np.zeros(sample_size, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        # test odd value
-        (
-            x.shape,
-            sample_size_odd,
-            [x],
-            [np.zeros(sample_size_odd, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        (
-            sample_size,
-            sample_size,
-            [],
-            [],
-            np.arange(np.prod(sample_size), dtype="float32").reshape(sample_size),
-            10.0 * std / np.sqrt(steps),
-            default_rtol,
-        ),
-        # test empty size (scalar)
-        ((), (), [], [], -5.0, default_rtol, 0.02),
-        # test with few samples at the same time
-        ((1,), (1,), [], [], -5.0, default_rtol, 0.02),
-        ((3,), (3,), [], [], -5.0, default_rtol, 0.02),
-    ]
-
-    for size, const_size, var_input, input, avg, rtol, std_tol in test_cases:
-        R = MRG_RandomStream(234)
-        # Note: we specify `nstreams` to avoid a warning.
-        n = R.normal(
-            size=size,
-            avg=avg,
-            std=std,
-            truncate=True,
-            nstreams=rng_mrg.guess_n_streams(size, warn=False),
-        )
-        f = function(var_input, n)
-
-        # check if truncated at 2*std
-        samples = f(*input)
-        assert np.all(avg + 2 * std - samples >= 0), "bad upper bound? {} {}".format(
-            samples,
-            avg + 2 * std,
-        )
-        assert np.all(samples - (avg - 2 * std) >= 0), "bad lower bound? {} {}".format(
-            samples,
-            avg - 2 * std,
-        )
-
-        # Increase the number of steps if size implies only a few samples
-        if np.prod(const_size) < 10:
-            steps_ = steps * 50
-        else:
-            steps_ = steps
-        check_basics(
-            f,
-            steps_,
-            const_size,
-            target_avg=avg,
-            target_std=target_std,
-            prefix="mrg ",
-            allow_01=True,
-            inputs=input,
-            mean_rtol=rtol,
-            std_tol=std_tol,
-        )
-
-        sys.stdout.flush()
-
-
-@pytest.mark.slow
-def test_truncated_normal():
-    # just a copy of test_normal0 for truncated normal
-    steps = 50
-    std = 2.0
-
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (25, 30)
-        default_rtol = 0.02
-    else:
-        sample_size = (999, 50)
-        default_rtol = 0.01
-    sample_size_odd = (sample_size[0], sample_size[1] - 1)
-    x = matrix()
-
-    test_cases = [
-        (sample_size, sample_size, [], [], -5.0, default_rtol, default_rtol),
-        (
-            x.shape,
-            sample_size,
-            [x],
-            [np.zeros(sample_size, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        # test odd value
-        (
-            x.shape,
-            sample_size_odd,
-            [x],
-            [np.zeros(sample_size_odd, dtype=config.floatX)],
-            -5.0,
-            default_rtol,
-            default_rtol,
-        ),
-        (
-            sample_size,
-            sample_size,
-            [],
-            [],
-            np.arange(np.prod(sample_size), dtype="float32").reshape(sample_size),
-            10.0 * std / np.sqrt(steps),
-            default_rtol,
-        ),
-        # test empty size (scalar)
-        ((), (), [], [], -5.0, default_rtol, 0.02),
-        # test with few samples at the same time
-        ((1,), (1,), [], [], -5.0, default_rtol, 0.02),
-        ((3,), (3,), [], [], -5.0, default_rtol, 0.02),
-    ]
-
-    for size, const_size, var_input, input, avg, rtol, std_tol in test_cases:
-        R = MRG_RandomStream(234)
-        # Note: we specify `nstreams` to avoid a warning.
-        n = R.truncated_normal(
-            size=size,
-            avg=avg,
-            std=std,
-            nstreams=rng_mrg.guess_n_streams(size, warn=False),
-        )
-        f = function(var_input, n)
-
-        # Increase the number of steps if size implies only a few samples
-        if np.prod(const_size) < 10:
-            steps_ = steps * 60
-        else:
-            steps_ = steps
-        check_basics(
-            f,
-            steps_,
-            const_size,
-            target_avg=avg,
-            target_std=std,
-            prefix="mrg ",
-            allow_01=True,
-            inputs=input,
-            mean_rtol=rtol,
-            std_tol=std_tol,
-        )
-
-        sys.stdout.flush()
-
-
-def basic_multinomialtest(
-    f, steps, sample_size, target_pvals, n_samples, prefix="", mean_rtol=0.04
-):
-
-    dt = 0.0
-    avg_pvals = np.zeros(target_pvals.shape, dtype=config.floatX)
-
-    for i in range(steps):
-        t0 = time.perf_counter()
-        ival = f()
-        assert ival.shape == sample_size
-        assert np.all(np.sum(ival, axis=1) == n_samples)
-        dt += time.perf_counter() - t0
-        avg_pvals += ival
-    avg_pvals /= steps * n_samples
-
-    assert np.mean(abs(avg_pvals - target_pvals)) < mean_rtol
-
-    # print("random?[:10]\n", np.asarray(f()[:10]))
-    # print(prefix, "mean", avg_pvals)
-    # # < mean_rtol, 'bad mean? %s %s' % (str(avg_pvals), str(target_pvals))
-    # print(np.mean(abs(avg_pvals - target_pvals)))
-    # print(prefix, "time", dt)
-    # print(prefix, "elements", steps * np.prod(target_pvals.shape))
-    # print(prefix, "samples/sec", steps * np.prod(target_pvals.shape) / dt)
-
-
-def test_multinomial():
-    steps = 100
-
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (49, 5)
-    else:
-        sample_size = (450, 6)
-
-    pvals = np.asarray(np.random.uniform(size=sample_size))
-    pvals = np.apply_along_axis(lambda row: row / np.sum(row), 1, pvals)
-    R = MRG_RandomStream(234)
-    # Note: we specify `nstreams` to avoid a warning.
-    m = R.multinomial(pvals=pvals, dtype=config.floatX, nstreams=30 * 256)
-    f = function([], m)
-    f()
-    basic_multinomialtest(f, steps, sample_size, pvals, n_samples=1, prefix="mrg ")
-
-
-def test_multinomial_n_samples():
-    if (
-        config.mode in ("DEBUG_MODE", "DebugMode", "FAST_COMPILE")
-        or config.mode == "Mode"
-        and config.linker in ["py"]
-    ):
-        sample_size = (49, 5)
-    else:
-        sample_size = (450, 6)
-
-    pvals = np.asarray(np.random.uniform(size=sample_size))
-    pvals = np.apply_along_axis(lambda row: row / np.sum(row), 1, pvals)
-    R = MRG_RandomStream(234)
-
-    for n_samples, steps in zip([5, 10, 100, 1000], [20, 10, 1, 1]):
-        m = R.multinomial(
-            pvals=pvals, n=n_samples, dtype=config.floatX, nstreams=30 * 256
-        )
-        f = function([], m)
-        basic_multinomialtest(f, steps, sample_size, pvals, n_samples, prefix="mrg ")
-        sys.stdout.flush()
-
-
-class TestMRG:
-    def test_bad_size(self):
-
-        R = MRG_RandomStream(234)
-
-        for size in [
-            (0, 100),
-            (-1, 100),
-            (1, 0),
-        ]:
-
-            with pytest.raises(ValueError):
-                R.uniform(size)
-            with pytest.raises(ValueError):
-                R.binomial(size)
-            with pytest.raises(ValueError):
-                R.multinomial(size, 1, [])
-            with pytest.raises(ValueError):
-                R.normal(size)
-            with pytest.raises(ValueError):
-                R.truncated_normal(size)
-
-
-def test_multiple_rng_aliasing():
-    # Test that when we have multiple random number generators, we do not alias
-    # the state_updates member. `state_updates` can be useful when attempting to
-    # copy the (random) state between two similar pytensor graphs. The test is
-    # meant to detect a previous bug where state_updates was initialized as a
-    # class-attribute, instead of the __init__ function.
-
-    rng1 = MRG_RandomStream(1234)
-    rng2 = MRG_RandomStream(2392)
-    assert rng1.state_updates is not rng2.state_updates
-
-
-def test_random_state_transfer():
-    # Test that random state can be transferred from one pytensor graph to another.
-
-    class Graph:
-        def __init__(self, seed=123):
-            self.rng = MRG_RandomStream(seed)
-            self.y = self.rng.uniform(size=(1,))
-
-    g1 = Graph(seed=123)
-    f1 = function([], g1.y)
-    g2 = Graph(seed=987)
-    f2 = function([], g2.y)
-
-    g2.rng.rstate = g1.rng.rstate
-    for (su1, su2) in zip(g1.rng.state_updates, g2.rng.state_updates):
-        su2[0].set_value(su1[0].get_value())
-
-    np.testing.assert_array_almost_equal(f1(), f2(), decimal=6)
-
-
-@pytest.mark.parametrize(
-    "mode",
-    [
-        "FAST_RUN",
-        "FAST_COMPILE",
-    ],
-)
-def test_gradient_scan(mode):
-    pytensor_rng = MRG_RandomStream(10)
-    w = shared(np.ones(1, dtype="float32"))
-
-    def one_step(x):
-        return x + pytensor_rng.uniform((1,), dtype="float32") * w
-
-    x = vector(dtype="float32")
-    values, updates = scan(one_step, outputs_info=x, n_steps=10)
-    gw = grad(at_sum(values[-1]), w)
-    f = function([x], gw, mode=mode)
-    assert np.allclose(
-        f(np.arange(1, dtype=np.float32)),
-        np.array([0.13928187], dtype=np.float32),
-        rtol=1e6,
-    )
-
-
-def test_simple_shared_mrg_random():
-    pytensor_rng = MRG_RandomStream(10)
-
-    values, updates = scan(
-        lambda: pytensor_rng.uniform((2,), -1, 1),
-        [],
-        [],
-        [],
-        n_steps=5,
-        truncate_gradient=-1,
-        go_backwards=False,
-    )
-    my_f = function([], values, updates=updates, allow_input_downcast=True)
-
-    # Just check for run-time errors
-    my_f()
-    my_f()
-
-
-def test_multMatVect():
-    A1 = lmatrix("A1")
-    s1 = ivector("s1")
-    m1 = iscalar("m1")
-    A2 = lmatrix("A2")
-    s2 = ivector("s2")
-    m2 = iscalar("m2")
-
-    g0 = rng_mrg.DotModulo()(A1, s1, m1, A2, s2, m2)
-    f0 = function([A1, s1, m1, A2, s2, m2], g0)
-
-    i32max = np.iinfo(np.int32).max
-    rng = np.random.default_rng(utt.fetch_seed())
-    A1 = rng.integers(0, i32max, (3, 3)).astype("int64")
-    s1 = rng.integers(0, i32max, 3).astype("int32")
-    m1 = np.asarray(rng.integers(i32max), dtype="int32")
-    A2 = rng.integers(0, i32max, (3, 3)).astype("int64")
-    s2 = rng.integers(0, i32max, 3).astype("int32")
-    m2 = np.asarray(rng.integers(i32max), dtype="int32")
-
-    f0.input_storage[0].storage[0] = A1
-    f0.input_storage[1].storage[0] = s1
-    f0.input_storage[2].storage[0] = m1
-    f0.input_storage[3].storage[0] = A2
-    f0.input_storage[4].storage[0] = s2
-    f0.input_storage[5].storage[0] = m2
-
-    r_a1 = rng_mrg.matVecModM(A1, s1, m1)
-    r_a2 = rng_mrg.matVecModM(A2, s2, m2)
-    f0.vm()
-    r_b = f0.output_storage[0].value
-
-    assert np.allclose(r_a1, r_b[:3])
-    assert np.allclose(r_a2, r_b[3:])
-
-
-def test_seed_fn():
-    idx = ivector()
-
-    for new_seed, same in [(234, True), (None, True), (23, False)]:
-        random = MRG_RandomStream(234)
-        fn1 = function([], random.uniform((2, 2), dtype="float32"))
-        fn2 = function([], random.uniform((3, 3), nstreams=2, dtype="float32"))
-        fn3 = function([idx], random.uniform(idx, nstreams=3, ndim=1, dtype="float32"))
-
-        fn1_val0 = fn1()
-        fn1_val1 = fn1()
-        assert not np.allclose(fn1_val0, fn1_val1)
-        fn2_val0 = fn2()
-        fn2_val1 = fn2()
-        assert not np.allclose(fn2_val0, fn2_val1)
-        fn3_val0 = fn3([4])
-        fn3_val1 = fn3([4])
-        assert not np.allclose(fn3_val0, fn3_val1)
-        assert fn1_val0.size == 4
-        assert fn2_val0.size == 9
-
-        random.seed(new_seed)
-
-        fn1_val2 = fn1()
-        fn1_val3 = fn1()
-        fn2_val2 = fn2()
-        fn2_val3 = fn2()
-        fn3_val2 = fn3([4])
-        fn3_val3 = fn3([4])
-        assert np.allclose(fn1_val0, fn1_val2) == same
-        assert np.allclose(fn1_val1, fn1_val3) == same
-        assert np.allclose(fn2_val0, fn2_val2) == same
-        assert np.allclose(fn2_val1, fn2_val3) == same
-        assert np.allclose(fn3_val0, fn3_val2) == same
-        assert np.allclose(fn3_val1, fn3_val3) == same
-
-
-def rng_mrg_overflow(sizes, fct, mode, should_raise_error):
-    for size in sizes:
-        y = fct(size=size)
-        f = function([], y, mode=mode)
-        if should_raise_error:
-            with pytest.raises(ValueError):
-                f()
-        else:
-            f()
-
-
-@pytest.mark.slow
-def test_overflow_cpu():
-    # run with PYTENSOR_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32
-    rng = MRG_RandomStream(np.random.default_rng(utt.fetch_seed()).integers(1234))
-    fct = rng.uniform
-    with config.change_flags(compute_test_value="off"):
-        # should raise error as the size overflows
-        sizes = [
-            (2**31,),
-            (2**32,),
-            (
-                2**15,
-                2**16,
-            ),
-            (2, 2**15, 2**15),
-        ]
-        rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=True)
-    # should not raise error
-    sizes = [(2**5,), (2**5, 2**5), (2**5, 2**5, 2**5)]
-    rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=False)
-    # should support int32 sizes
-    sizes = [(np.int32(2**10),), (np.int32(2), np.int32(2**10), np.int32(2**10))]
-    rng_mrg_overflow(sizes, fct, config.mode, should_raise_error=False)
-
-
-def test_undefined_grad():
-    srng = MRG_RandomStream(seed=1234)
-
-    # checking uniform distribution
-    low = scalar()
-    out = srng.uniform((), low=low)
-    with pytest.raises(NullTypeGradError):
-        grad(out, low)
-
-    high = scalar()
-    out = srng.uniform((), low=0, high=high)
-    with pytest.raises(NullTypeGradError):
-        grad(out, high)
-
-    out = srng.uniform((), low=low, high=high)
-    with pytest.raises(NullTypeGradError):
-        grad(out, (low, high))
-
-    # checking binomial distribution
-    prob = scalar()
-    out = srng.binomial((), p=prob)
-    with pytest.raises(NullTypeGradError):
-        grad(out, prob)
-
-    # checking multinomial distribution
-    prob1 = scalar()
-    prob2 = scalar()
-    p = [as_tensor_variable([prob1, 0.5, 0.25])]
-    out = srng.multinomial(size=None, pvals=p, n=4)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(at_sum(out), prob1)
-
-    p = [as_tensor_variable([prob1, prob2])]
-    out = srng.multinomial(size=None, pvals=p, n=4)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(at_sum(out), (prob1, prob2))
-
-    # checking choice
-    p = [as_tensor_variable([prob1, prob2, 0.1, 0.2])]
-    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(out[0], (prob1, prob2))
-
-    p = [as_tensor_variable([prob1, prob2])]
-    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(out[0], (prob1, prob2))
-
-    p = [as_tensor_variable([prob1, 0.2, 0.3])]
-    out = srng.choice(a=None, size=1, p=p, replace=False)[0]
-    with pytest.raises(NullTypeGradError):
-        grad(out[0], prob1)
-
-    # checking normal distribution
-    avg = scalar()
-    out = srng.normal((), avg=avg)
-    with pytest.raises(NullTypeGradError):
-        grad(out, avg)
-
-    std = scalar()
-    out = srng.normal((), avg=0, std=std)
-    with pytest.raises(NullTypeGradError):
-        grad(out, std)
-
-    out = srng.normal((), avg=avg, std=std)
-    with pytest.raises(NullTypeGradError):
-        grad(out, (avg, std))
-
-    # checking truncated normal distribution
-    avg = scalar()
-    out = srng.truncated_normal((), avg=avg)
-    with pytest.raises(NullTypeGradError):
-        grad(out, avg)
-
-    std = scalar()
-    out = srng.truncated_normal((), avg=0, std=std)
-    with pytest.raises(NullTypeGradError):
-        grad(out, std)
-
-    out = srng.truncated_normal((), avg=avg, std=std)
-    with pytest.raises(NullTypeGradError):
-        grad(out, (avg, std))
-
-
-def test_f16_nonzero(mode=None, op_to_check=rng_mrg.mrg_uniform):
-    srng = MRG_RandomStream(seed=utt.fetch_seed())
-    m = srng.uniform(size=(1000, 1000), dtype="float16")
-    assert m.dtype == "float16", m.type
-    f = function([], m, mode=mode)
-    assert any(isinstance(n.op, op_to_check) for n in f.maker.fgraph.apply_nodes)
-    m_val = f()
-    assert np.all((0 < m_val) & (m_val < 1))
-
-
-@pytest.mark.slow
-def test_target_parameter():
-    srng = MRG_RandomStream()
-    pvals = np.array([[0.98, 0.01, 0.01], [0.01, 0.49, 0.50]])
-
-    def basic_target_parameter_test(x):
-        f = function([], x)
-        assert isinstance(f(), np.ndarray)
-
-    basic_target_parameter_test(srng.uniform((3, 2), target="cpu"))
-    basic_target_parameter_test(srng.normal((3, 2), target="cpu"))
-    basic_target_parameter_test(srng.truncated_normal((3, 2), target="cpu"))
-    basic_target_parameter_test(srng.binomial((3, 2), target="cpu"))
-    basic_target_parameter_test(
-        srng.multinomial(pvals=pvals.astype("float32"), target="cpu")
-    )
-    basic_target_parameter_test(
-        srng.choice(p=pvals.astype("float32"), replace=False, target="cpu")
-    )
-    with pytest.deprecated_call():
-        basic_target_parameter_test(
-            srng.multinomial_wo_replacement(pvals=pvals.astype("float32"), target="cpu")
-        )
-
-
-@config.change_flags(compute_test_value="off")
-def test_undefined_grad_opt():
-    # Make sure that undefined grad get removed in optimized graph.
-    random = MRG_RandomStream(
-        np.random.default_rng(utt.fetch_seed()).integers(1, 2147462579)
-    )
-    pvals = shared(np.random.random((10, 20)).astype(config.floatX))
-    pvals = pvals / pvals.sum(axis=1)
-    pvals = zero_grad(pvals)
-    samples = random.multinomial(pvals=pvals, n=1)
-    samples = cast(samples, pvals.dtype)
-    samples = zero_grad(samples)
-    cost = at_sum(samples + pvals)
-    grad_out = grad(cost, samples)
-    f = function([], grad_out)
-    assert not any(
-        isinstance(node.op, UndefinedGrad) for node in f.maker.fgraph.apply_nodes
-    )
--- a/tests/test_gradient.py
+++ b/tests/test_gradient.py
@@ -30,10 +30,10 @@ from pytensor.gradient import (
 from pytensor.graph.basic import Apply, graph_inputs
 from pytensor.graph.null_type import NullType
 from pytensor.graph.op import Op
-from pytensor.sandbox.rng_mrg import MRG_RandomStream
 from pytensor.tensor.math import add, dot, exp, sigmoid, sqr
 from pytensor.tensor.math import sum as at_sum
 from pytensor.tensor.math import tanh
+from pytensor.tensor.random import RandomStream
 from pytensor.tensor.type import (
    discrete_dtypes,
    dmatrix,
@@ -956,13 +956,13 @@ def test_grad_scale():
 @config.change_flags(compute_test_value="off")
 def test_undefined_grad_opt():
    # Make sure that undefined grad get removed in optimized graph.
-    random = MRG_RandomStream(np.random.default_rng().integers(1, 2147462579))
+    random = RandomStream(np.random.default_rng().integers(1, 2147462579))

    pvals = pytensor.shared(np.random.random((10, 20)).astype(config.floatX))
    pvals = pvals / pvals.sum(axis=1)
    pvals = zero_grad(pvals)

-    samples = random.multinomial(pvals=pvals, n=1)
+    samples = random.multinomial(p=pvals, n=1)
    samples = at.cast(samples, pvals.dtype)
    samples = zero_grad(samples)