提交 230a8080 authored 作者: Ricardo Vieira's avatar Ricardo Vieira 提交者: Ricardo Vieira

Add functional vectorize helper to pytensor.tensor module

上级 27781606
...@@ -148,6 +148,7 @@ from pytensor.tensor.subtensor import * # noqa ...@@ -148,6 +148,7 @@ from pytensor.tensor.subtensor import * # noqa
from pytensor.tensor.type import * # noqa from pytensor.tensor.type import * # noqa
from pytensor.tensor.type_other import * # noqa from pytensor.tensor.type_other import * # noqa
from pytensor.tensor.variable import TensorConstant, TensorVariable # noqa from pytensor.tensor.variable import TensorConstant, TensorVariable # noqa
from pytensor.tensor.functional import vectorize # noqa
# Allow accessing numpy constants from pytensor.tensor # Allow accessing numpy constants from pytensor.tensor
from numpy import e, euler_gamma, inf, infty, nan, newaxis, pi # noqa from numpy import e, euler_gamma, inf, infty, nan, newaxis, pi # noqa
......
import re
from collections.abc import Sequence from collections.abc import Sequence
from typing import Any, Optional, cast from typing import Any, Optional, cast
...@@ -13,49 +12,14 @@ from pytensor.graph.replace import _vectorize_node, vectorize_graph ...@@ -13,49 +12,14 @@ from pytensor.graph.replace import _vectorize_node, vectorize_graph
from pytensor.tensor import as_tensor_variable from pytensor.tensor import as_tensor_variable
from pytensor.tensor.shape import shape_padleft from pytensor.tensor.shape import shape_padleft
from pytensor.tensor.type import continuous_dtypes, discrete_dtypes, tensor from pytensor.tensor.type import continuous_dtypes, discrete_dtypes, tensor
from pytensor.tensor.utils import broadcast_static_dim_lengths, import_func_from_string from pytensor.tensor.utils import (
_parse_gufunc_signature,
broadcast_static_dim_lengths,
import_func_from_string,
)
from pytensor.tensor.variable import TensorVariable from pytensor.tensor.variable import TensorVariable
# TODO: Implement vectorize helper to batch whole graphs (similar to what Blockwise does for the grad)
# Copied verbatim from numpy.lib.function_base
# https://github.com/numpy/numpy/blob/f2db090eb95b87d48a3318c9a3f9d38b67b0543c/numpy/lib/function_base.py#L1999-L2029
_DIMENSION_NAME = r"\w+"
_CORE_DIMENSION_LIST = "(?:{0:}(?:,{0:})*)?".format(_DIMENSION_NAME)
_ARGUMENT = rf"\({_CORE_DIMENSION_LIST}\)"
_ARGUMENT_LIST = "{0:}(?:,{0:})*".format(_ARGUMENT)
_SIGNATURE = "^{0:}->{0:}$".format(_ARGUMENT_LIST)
def _parse_gufunc_signature(signature):
"""
Parse string signatures for a generalized universal function.
Arguments
---------
signature : string
Generalized universal function signature, e.g., ``(m,n),(n,p)->(m,p)``
for ``np.matmul``.
Returns
-------
Tuple of input and output core dimensions parsed from the signature, each
of the form List[Tuple[str, ...]].
"""
signature = re.sub(r"\s+", "", signature)
if not re.match(_SIGNATURE, signature):
raise ValueError(f"not a valid gufunc signature: {signature}")
return tuple(
[
tuple(re.findall(_DIMENSION_NAME, arg))
for arg in re.findall(_ARGUMENT, arg_list)
]
for arg_list in signature.split("->")
)
def safe_signature( def safe_signature(
core_inputs: Sequence[Variable], core_inputs: Sequence[Variable],
core_outputs: Sequence[Variable], core_outputs: Sequence[Variable],
......
from typing import Callable, Optional
from pytensor.graph import vectorize_graph
from pytensor.tensor import TensorVariable
from pytensor.tensor.utils import _parse_gufunc_signature
def vectorize(func: Callable, signature: Optional[str] = None) -> Callable:
"""Create a vectorized version of a python function that takes TensorVariables as inputs and outputs.
Similar to numpy.vectorize. See respective docstrings for more details.
Parameters
----------
func: Callable
Function that creates the desired outputs from TensorVariable inputs with the core dimensions.
signature: str, optional
Generalized universal function signature, e.g., (m,n),(n)->(m) for vectorized matrix-vector multiplication.
If not provided, it is assumed all inputs have scalar core dimensions. Unlike numpy, the outputs
can have arbitrary shapes when the signature is not provided.
Returns
-------
vectorized_func: Callable
Callable that takes TensorVariables with arbitrarily batched dimensions on the left
and returns variables whose graphs correspond to the vectorized expressions of func.
Notes
-----
Unlike numpy.vectorize, the equality of core dimensions implied by the signature is not explicitly asserted.
To vectorize an existing graph, use `pytensor.graph.replace.vectorize_graph` instead.
Examples
--------
.. code-block:: python
import pytensor
import pytensor.tensor as pt
def func(x):
return pt.exp(x) / pt.sum(pt.exp(x))
vec_func = pt.vectorize(func, signature="(a)->(a)")
x = pt.matrix("x")
y = vec_func(x)
fn = pytensor.function([x], y)
fn([[0, 1, 2], [2, 1, 0]])
# array([[0.09003057, 0.24472847, 0.66524096],
# [0.66524096, 0.24472847, 0.09003057]])
.. code-block:: python
import pytensor
import pytensor.tensor as pt
def func(x):
return x[0], x[-1]
vec_func = pt.vectorize(func, signature="(a)->(),()")
x = pt.matrix("x")
y1, y2 = vec_func(x)
fn = pytensor.function([x], [y1, y2])
fn([[-10, 0, 10], [-11, 0, 11]])
# [array([-10., -11.]), array([10., 11.])]
"""
def inner(*inputs):
if signature is None:
# Assume all inputs are scalar
inputs_sig = [()] * len(inputs)
else:
inputs_sig, outputs_sig = _parse_gufunc_signature(signature)
if len(inputs) != len(inputs_sig):
raise ValueError(
f"Number of inputs does not match signature: {signature}"
)
# Create dummy core inputs by stripping the batched dimensions of inputs
core_inputs = []
for input, input_sig in zip(inputs, inputs_sig):
if not isinstance(input, TensorVariable):
raise TypeError(
f"Inputs to vectorize function must be TensorVariable, got {type(input)}"
)
if input.ndim < len(input_sig):
raise ValueError(
f"Input {input} has less dimensions than signature {input_sig}"
)
if len(input_sig):
core_shape = input.type.shape[-len(input_sig) :]
else:
core_shape = ()
core_input = input.type.clone(shape=core_shape)(name=input.name)
core_inputs.append(core_input)
# Call function on dummy core inputs
core_outputs = func(*core_inputs)
if core_outputs is None:
raise ValueError("vectorize function returned no outputs")
if signature is not None:
if isinstance(core_outputs, (list, tuple)):
n_core_outputs = len(core_outputs)
else:
n_core_outputs = 1
if n_core_outputs != len(outputs_sig):
raise ValueError(
f"Number of outputs does not match signature: {signature}"
)
# Vectorize graph by replacing dummy core inputs by original inputs
outputs = vectorize_graph(core_outputs, replace=dict(zip(core_inputs, inputs)))
return outputs
return inner
import re
from collections.abc import Sequence from collections.abc import Sequence
from typing import Union from typing import Union
...@@ -161,3 +162,40 @@ def broadcast_static_dim_lengths( ...@@ -161,3 +162,40 @@ def broadcast_static_dim_lengths(
if len(dim_lengths_set) > 1: if len(dim_lengths_set) > 1:
raise ValueError raise ValueError
return tuple(dim_lengths_set)[0] return tuple(dim_lengths_set)[0]
# Copied verbatim from numpy.lib.function_base
# https://github.com/numpy/numpy/blob/f2db090eb95b87d48a3318c9a3f9d38b67b0543c/numpy/lib/function_base.py#L1999-L2029
_DIMENSION_NAME = r"\w+"
_CORE_DIMENSION_LIST = "(?:{0:}(?:,{0:})*)?".format(_DIMENSION_NAME)
_ARGUMENT = rf"\({_CORE_DIMENSION_LIST}\)"
_ARGUMENT_LIST = "{0:}(?:,{0:})*".format(_ARGUMENT)
_SIGNATURE = "^{0:}->{0:}$".format(_ARGUMENT_LIST)
def _parse_gufunc_signature(signature):
"""
Parse string signatures for a generalized universal function.
Arguments
---------
signature : string
Generalized universal function signature, e.g., ``(m,n),(n,p)->(m,p)``
for ``np.matmul``.
Returns
-------
Tuple of input and output core dimensions parsed from the signature, each
of the form List[Tuple[str, ...]].
"""
signature = re.sub(r"\s+", "", signature)
if not re.match(_SIGNATURE, signature):
raise ValueError(f"not a valid gufunc signature: {signature}")
return tuple(
[
tuple(re.findall(_DIMENSION_NAME, arg))
for arg in re.findall(_ARGUMENT, arg_list)
]
for arg_list in signature.split("->")
)
...@@ -10,9 +10,10 @@ from pytensor.gradient import grad ...@@ -10,9 +10,10 @@ from pytensor.gradient import grad
from pytensor.graph import Apply, Op from pytensor.graph import Apply, Op
from pytensor.graph.replace import vectorize_node from pytensor.graph.replace import vectorize_node
from pytensor.tensor import diagonal, log, tensor from pytensor.tensor import diagonal, log, tensor
from pytensor.tensor.blockwise import Blockwise, _parse_gufunc_signature from pytensor.tensor.blockwise import Blockwise
from pytensor.tensor.nlinalg import MatrixInverse from pytensor.tensor.nlinalg import MatrixInverse
from pytensor.tensor.slinalg import Cholesky, Solve, cholesky, solve_triangular from pytensor.tensor.slinalg import Cholesky, Solve, cholesky, solve_triangular
from pytensor.tensor.utils import _parse_gufunc_signature
def test_vectorize_blockwise(): def test_vectorize_blockwise():
......
import numpy as np
import pytest
from pytensor.graph.basic import equal_computations
from pytensor.tensor import full, tensor
from pytensor.tensor.functional import vectorize
from pytensor.tensor.random.type import RandomGeneratorType
class TestVectorize:
def test_vectorize_no_signature(self):
"""Unlike numpy we don't assume outputs of vectorize without signature are scalar."""
def func(x):
return full((5, 3), x)
vec_func = vectorize(func)
x = tensor("x", shape=(4,), dtype="float64")
out = vec_func(x)
assert out.type.ndim == 3
test_x = np.array([1, 2, 3, 4])
np.testing.assert_allclose(
out.eval({x: test_x}), np.full((len(test_x), 5, 3), test_x[:, None, None])
)
def test_vectorize_outer_product(self):
def func(x, y):
return x[:, None] * y[None, :]
vec_func = vectorize(func, signature="(a),(b)->(a,b)")
x = tensor("x", shape=(2, 3, 5))
y = tensor("y", shape=(2, 3, 7))
out = vec_func(x, y)
assert out.type.shape == (2, 3, 5, 7)
assert equal_computations([out], [x[..., :, None] * y[..., None, :]])
def test_vectorize_outer_inner_product(self):
def func(x, y):
return x[:, None] * y[None, :], (x * y).sum()
vec_func = vectorize(func, signature="(a),(b)->(a,b),()")
x = tensor("x", shape=(2, 3, 5))
y = tensor("y", shape=(2, 3, 5))
outer, inner = vec_func(x, y)
assert outer.type.shape == (2, 3, 5, 5)
assert inner.type.shape == (2, 3)
assert equal_computations([outer], [x[..., :, None] * y[..., None, :]])
assert equal_computations([inner], [(x * y).sum(axis=-1)])
def test_errors(self):
def func(x, y):
return x + y, x - y
x = tensor("x", shape=(5,))
y = tensor("y", shape=())
with pytest.raises(ValueError, match="Number of inputs"):
vectorize(func, signature="(),()->()")(x)
with pytest.raises(ValueError, match="Number of outputs"):
vectorize(func, signature="(),()->()")(x, y)
with pytest.raises(ValueError, match="Input y has less dimensions"):
vectorize(func, signature="(a),(a)->(a),(a)")(x, y)
bad_input = RandomGeneratorType()
with pytest.raises(TypeError, match="must be TensorVariable"):
vectorize(func)(bad_input, x)
def bad_func(x, y):
x + y
with pytest.raises(ValueError, match="no outputs"):
vectorize(bad_func)(x, y)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论