提交 4b897162 authored 作者: jessegrabowski's avatar jessegrabowski 提交者: Ricardo Vieira

Implement `Pack` and `Unpack`

上级 a0be97e8
from collections.abc import Sequence
from collections.abc import Iterable, Sequence
from itertools import pairwise
from typing import cast as type_cast
import numpy as np
......@@ -9,7 +10,7 @@ from pytensor.graph import Apply
from pytensor.graph.op import Op
from pytensor.graph.replace import _vectorize_node
from pytensor.tensor import TensorLike, as_tensor_variable
from pytensor.tensor.basic import infer_static_shape
from pytensor.tensor.basic import expand_dims, infer_static_shape, join, split
from pytensor.tensor.math import prod
from pytensor.tensor.shape import ShapeValueType
from pytensor.tensor.type import tensor
......@@ -152,12 +153,12 @@ class SplitDims(Op):
self.axis = axis
def make_node(self, x: Variable, shape: Variable) -> Apply: # type: ignore[override]
if shape.type.numpy_dtype.kind not in "iu":
raise TypeError("shape must be an integer tensor")
x = as_tensor_variable(x)
shape = as_tensor_variable(shape, dtype=int, ndim=1)
if shape.type.numpy_dtype.kind not in "iu":
raise TypeError("shape must be an integer tensor")
axis = self.axis
_, constant_shape = infer_static_shape(shape)
......@@ -261,4 +262,263 @@ def split_dims(
return type_cast(TensorVariable, split_op(x, shape))
__all__ = ["join_dims", "split_dims"]
def _analyze_axes_list(axes) -> tuple[int, int, int]:
"""
Analyze the provided axes list to determine how many axes are before and after the interval to be raveled, as
well as the minimum and maximum number of axes that the inputs can have.
The rules are:
- Axes must be strictly increasing in both the positive and negative parts of the list.
- Negative axes must come after positive axes.
- There can be at most one "hole" in the axes list, which can be either an implicit hole on an endpoint
(e.g. [0, 1]) or an explicit hole in the middle (e.g. [0, 2] or [1, -1]).
Returns
-------
n_axes_before: int
The number of axes before the interval to be raveled.
n_axes_after: int
The number of axes after the interval to be raveled.
min_axes: int
The minimum number of axes that the inputs must have.
"""
if axes is None:
return 0, 0, 0
if isinstance(axes, int):
axes = (axes,)
elif not isinstance(axes, Iterable):
raise TypeError("axes must be an int, an iterable of ints, or None")
axes = tuple(axes)
if len(axes) == 0:
raise ValueError("axes=[] is ambiguous; use None to ravel all")
if len(set(axes)) != len(axes):
raise ValueError("axes must have no duplicates")
first_negative_idx = next((i for i, a in enumerate(axes) if a < 0), len(axes))
positive_axes = list(axes[:first_negative_idx])
negative_axes = list(axes[first_negative_idx:])
if not all(a < 0 for a in negative_axes):
raise ValueError("Negative axes must come after positive")
def not_strictly_increasing(s):
if len(s) < 1:
return False
return any(b <= a for a, b in pairwise(s))
if not_strictly_increasing(positive_axes):
raise ValueError("Axes must be strictly increasing in the positive part")
if not_strictly_increasing(negative_axes):
raise ValueError("Axes must be strictly increasing in the negative part")
def find_gaps(s):
"""Find if there are gaps in a strictly increasing sequence."""
return any(b - a > 1 for a, b in pairwise(s))
if find_gaps(positive_axes):
raise ValueError("Positive axes must be contiguous")
if find_gaps(negative_axes):
raise ValueError("Negative axes must be contiguous")
if positive_axes and positive_axes[0] != 0:
raise ValueError(
"If positive axes are provided, the first positive axis must be 0 to avoid ambiguity. To ravel indices "
"starting from the front, use negative axes only."
)
if negative_axes and negative_axes[-1] != -1:
raise ValueError(
"If negative axes are provided, the last negative axis must be -1 to avoid ambiguity. To ravel indices "
"up to the end, use positive axes only."
)
n_before = len(positive_axes)
n_after = len(negative_axes)
min_axes = n_before + n_after
return n_before, n_after, min_axes
def pack(
*tensors: TensorLike, axes: Sequence[int] | int | None = None
) -> tuple[TensorVariable, list[ShapeValueType]]:
"""
Combine multiple tensors by preserving the specified axes and raveling the rest into a single axis.
Parameters
----------
*tensors : TensorLike
Input tensors to be packed.
axes : int, sequence of int, or None, optional
Axes to preserve during packing. If None, all axes are raveled. See the Notes section for the rules.
Returns
-------
packed_tensor : TensorLike
The packed tensor with specified axes preserved and others raveled.
packed_shapes : list of ShapeValueType
A list containing the shapes of the raveled dimensions for each input tensor.
Notes
-----
The `axes` parameter determines which axes are preserved during packing. Axes can be specified using positive or
negative indices, but must follow these rules:
- If axes is None, all axes are raveled.
- If a single integer is provided, it can be positive or negative, and can take any value up to the smallest
number of dimensions among the input tensors.
- If a list is provided, it can be all positive, all negative, or a combination of positive and negative.
- Positive axes must be contiguous and start from 0.
- Negative axes must be contiguous and end at -1.
- If positive and negative axes are combined, positive axes must come before negative axes, and both 0 and -1
must be included.
Examples
--------
The easiest way to understand pack is through examples. The simplest case is using axes=None, which is equivalent
to ``join(0, *[t.ravel() for t in tensors])``:
.. code-block:: python
import pytensor.tensor as pt
x = pt.tensor("x", shape=(2, 3))
y = pt.tensor("y", shape=(4, 5, 6))
packed_tensor, packed_shapes = pt.pack(x, y, axes=None)
# packed_tensor has shape (6 + 120,) == (126,)
# packed_shapes is [(2, 3), (4, 5, 6)]
If we want to preserve a single axis, we can use either positive or negative indexing. Notice that all tensors
must have the same size along the preserved axis. For example, using axes=0:
.. code-block:: python
import pytensor.tensor as pt
x = pt.tensor("x", shape=(2, 3))
y = pt.tensor("y", shape=(2, 5, 6))
packed_tensor, packed_shapes = pt.pack(x, y, axes=0)
# packed_tensor has shape (2, 3 + 30) == (2, 33)
# packed_shapes is [(3,), (5, 6)]
Using negative indexing we can preserve the last two axes:
.. code-block:: python
import pytensor.tensor as pt
x = pt.tensor("x", shape=(4, 2, 3))
y = pt.tensor("y", shape=(5, 2, 3))
packed_tensor, packed_shapes = pt.pack(x, y, axes=(-2, -1))
# packed_tensor has shape (4 + 5, 2, 3) == (9, 2, 3)
# packed_shapes is [(4,), (5,
Or using a mix of positive and negative axes, we can preserve the first and last axes:
.. code-block:: python
import pytensor.tensor as pt
x = pt.tensor("x", shape=(2, 4, 3))
y = pt.tensor("y", shape=(2, 5, 3))
packed_tensor, packed_shapes = pt.pack(x, y, axes=(0, -1))
# packed_tensor has shape (2, 4 + 5, 3) == (2, 9, 3)
# packed_shapes is [(4,), (5,)]
"""
tensor_list = [as_tensor_variable(t) for t in tensors]
n_before, n_after, min_axes = _analyze_axes_list(axes)
reshaped_tensors: list[TensorVariable] = []
packed_shapes: list[ShapeValueType] = []
for i, input_tensor in enumerate(tensor_list):
n_dim = input_tensor.ndim
if n_dim < min_axes:
raise ValueError(
f"Input {i} (zero indexed) to pack has {n_dim} dimensions, "
f"but axes={axes} assumes at least {min_axes} dimension{'s' if min_axes != 1 else ''}."
)
n_after_packed = n_dim - n_after
packed_shapes.append(input_tensor.shape[n_before:n_after_packed])
if n_dim == min_axes:
# If an input has the minimum number of axes, pack implicitly inserts a new axis based on the pattern
# implied by the axes.
input_tensor = expand_dims(input_tensor, axis=n_before)
reshaped_tensors.append(input_tensor)
continue
# The reshape we want is (shape[:before], -1, shape[n_after_packed:]). join_dims does (shape[:min(axes)], -1,
# shape[max(axes)+1:]). So this will work if we choose axes=(n_before, n_after_packed - 1). Because of the
# rules on the axes input, we will always have n_before <= n_after_packed - 1. A set is used here to cover the
# corner case when n_before == n_after_packed - 1 (i.e., when there is only one axis to ravel --> do nothing).
join_axes = range(n_before, n_after_packed)
joined = join_dims(input_tensor, tuple(join_axes))
reshaped_tensors.append(joined)
return join(n_before, *reshaped_tensors), packed_shapes
def unpack(
packed_input: TensorLike,
axes: int | Sequence[int] | None,
packed_shapes: list[ShapeValueType],
) -> list[TensorVariable]:
"""
Unpack a packed tensor into multiple tensors by splitting along the specified axes and reshaping.
The unpacking process reverses the packing operation, restoring the original shapes of the input tensors. `axes`
corresponds to the axes that were preserved during packing, and `packed_shapes` contains the shapes of the raveled
dimensions for each output tensor (that is, the shapes that were destroyed during packing).
The signature of unpack is such that the same `axes` should be passed to both `pack` and `unpack` to create a
"round-trip" operation. For details on the rules for `axes`, see the documentation for `pack`.
Parameters
----------
packed_input : TensorLike
The packed tensor to be unpacked.
axes : int, sequence of int, or None
Axes that were preserved during packing. If None, the input is assumed to be 1D and axis 0 is used.
packed_shapes : list of ShapeValueType
A list containing the shapes of the raveled dimensions for each output tensor.
Returns
-------
unpacked_tensors : list of TensorLike
A list of unpacked tensors with their original shapes restored.
"""
packed_input = as_tensor_variable(packed_input)
if axes is None:
if packed_input.ndim != 1:
raise ValueError(
"unpack can only be called with keep_axis=None for 1d inputs"
)
split_axis = 0
else:
axes = normalize_axis_tuple(axes, ndim=packed_input.ndim)
try:
[split_axis] = (i for i in range(packed_input.ndim) if i not in axes)
except ValueError as err:
raise ValueError(
"Unpack must have exactly one more dimension that implied by axes"
) from err
split_inputs = split(
packed_input,
splits_size=[prod(shape, dtype=int) for shape in packed_shapes],
n_splits=len(packed_shapes),
axis=split_axis,
)
return [
split_dims(inp, shape, split_axis)
for inp, shape in zip(split_inputs, packed_shapes, strict=True)
]
__all__ = ["join_dims", "pack", "split_dims", "unpack"]
import numpy as np
import pytest
import pytensor
from pytensor import config, function
from pytensor import tensor as pt
from pytensor.graph import vectorize_graph
from pytensor.graph import rewrite_graph, vectorize_graph
from pytensor.tensor.reshape import (
_analyze_axes_list,
join_dims,
pack,
split_dims,
unpack,
)
......@@ -95,3 +99,187 @@ def test_split_size_zero_shape():
x_split_value = fn(x_value)
np.testing.assert_allclose(x_split_value, x_value.squeeze(0))
def test_make_replacements_with_pack_unpack():
rng = np.random.default_rng()
x = pt.tensor("x", shape=())
y = pt.tensor("y", shape=(5,))
z = pt.tensor("z", shape=(3, 3))
loss = (x + y.sum() + z.sum()) ** 2
flat_packed, packed_shapes = pack(x, y, z, axes=None)
new_input = flat_packed.type()
new_outputs = unpack(new_input, axes=None, packed_shapes=packed_shapes)
loss = pytensor.graph.graph_replace(loss, dict(zip([x, y, z], new_outputs)))
rewrite_graph(loss, include=("ShapeOpt", "specialize"))
fn = pytensor.function([new_input], loss, mode="FAST_COMPILE")
input_vals = [
rng.normal(size=(var.type.shape)).astype(config.floatX) for var in [x, y, z]
]
flat_inputs = np.concatenate([input.ravel() for input in input_vals], axis=0)
output_val = fn(flat_inputs)
assert np.allclose(output_val, sum([input.sum() for input in input_vals]) ** 2)
class TestPack:
@pytest.mark.parametrize(
"axes, expected",
[
(None, [0, 0, 0]), # '*'
([0, 1], [2, 0, 2]), # 'i j *'
([-1], [0, 1, 1]), # '* k'
([-2, -1], [0, 2, 2]), # '* i j'
([0, -1], [1, 1, 2]), # 'i * k'
([0, 1, 2, -1], [3, 1, 4]), # 'i j k * l'
],
ids=[
"ravel_all",
"keep_first_two",
"keep_last",
"ravel_start",
"first_and_last",
"complex_case",
],
)
def test_analyze_axes_list_valid(self, axes, expected):
outputs = _analyze_axes_list(axes)
names = ["n_before", "n_after", "min_axes"]
for out, exp, name in zip(outputs, expected, names, strict=True):
assert out == exp, f"Expected {exp}, got {out} for {name}"
def test_analyze_axes_list_invalid(self):
# Positive only but not contiguous
with pytest.raises(ValueError, match="Positive axes must be contiguous"):
_analyze_axes_list([1, 3])
# Negative only but not contiguous
with pytest.raises(ValueError, match="Negative axes must be contiguous"):
_analyze_axes_list([-3, -1])
# Mixed up positive and negative
with pytest.raises(ValueError, match="Negative axes must come after positive"):
_analyze_axes_list([0, 1, -2, 4])
# Duplicate axes
with pytest.raises(ValueError, match="axes must have no duplicates"):
_analyze_axes_list([0, 0])
# Not monotonic
with pytest.raises(ValueError, match="Axes must be strictly increasing"):
_analyze_axes_list([0, 2, 1])
# Negative before positive
with pytest.raises(ValueError, match="Negative axes must come after positive"):
_analyze_axes_list([-1, 0])
def test_pack_basic(self):
# rng = np.random.default_rng()
x = pt.tensor("x", shape=())
y = pt.tensor("y", shape=(5,))
z = pt.tensor("z", shape=(3, 3))
input_dict = {
variable.name: np.zeros(variable.type.shape, dtype=config.floatX)
for variable in [x, y, z]
}
# Simple case, reduce all axes, equivalent to einops '*'
packed_tensor, packed_shapes = pack(x, y, z, axes=None)
assert packed_tensor.type.shape == (15,)
for tensor, packed_shape in zip([x, y, z], packed_shapes):
assert packed_shape.type.shape == (tensor.ndim,)
np.testing.assert_allclose(
packed_shape.eval(input_dict, on_unused_input="ignore"),
tensor.type.shape,
)
# To preserve an axis, all inputs need at least one dimension, and the preserved axis has to agree.
# x is scalar, so pack will raise:
with pytest.raises(
ValueError,
match=r"Input 0 \(zero indexed\) to pack has 0 dimensions, but axes=0 assumes at least 1 dimension\.",
):
pack(x, y, z, axes=0)
# With valid x, pack should still raise, because the axis of concatenation doesn't agree across all inputs
x = pt.tensor("x", shape=(3,))
input_dict["x"] = np.zeros((3,), dtype=config.floatX)
with pytest.raises(
ValueError,
match=r"all input array dimensions other than the specified `axis` \(1\) must match exactly, or be unknown "
r"\(None\), but along dimension 0, the inputs shapes are incompatible: \[3 5 3\]",
):
packed_tensor, packed_shapes = pack(x, y, z, axes=0)
packed_tensor.eval(input_dict)
# Valid case, preserve first axis, equivalent to einops 'i *'
y = pt.tensor("y", shape=(3, 5))
z = pt.tensor("z", shape=(3, 3, 3))
packed_tensor, packed_shapes = pack(x, y, z, axes=0)
input_dict = {
variable.name: np.zeros(variable.type.shape, dtype=config.floatX)
for variable in [x, y, z]
}
assert packed_tensor.type.shape == (3, 15)
for tensor, packed_shape in zip([x, y, z], packed_shapes):
assert packed_shape.type.shape == (tensor.ndim - 1,)
np.testing.assert_allclose(
packed_shape.eval(input_dict, on_unused_input="ignore"),
tensor.type.shape[1:],
)
# More complex case, preserve last axis implicitly, equivalent to einops 'i * k'. This introduces a max
# dimension condition on the input shapes
x = pt.tensor("x", shape=(3, 2))
y = pt.tensor("y", shape=(3, 5, 2))
z = pt.tensor("z", shape=(3, 1, 7, 5, 2))
with pytest.raises(
ValueError,
match=r"Positive axes must be contiguous",
):
pack(x, y, z, axes=[0, 3])
z = pt.tensor("z", shape=(3, 1, 7, 2))
packed_tensor, packed_shapes = pack(x, y, z, axes=[0, -1])
input_dict = {
variable.name: np.zeros(variable.type.shape, dtype=config.floatX)
for variable in [x, y, z]
}
assert packed_tensor.type.shape == (3, 13, 2)
for tensor, packed_shape in zip([x, y, z], packed_shapes):
assert packed_shape.type.shape == (tensor.ndim - 2,)
np.testing.assert_allclose(
packed_shape.eval(input_dict, on_unused_input="ignore"),
tensor.type.shape[1:-1],
)
@pytest.mark.parametrize("axes", [-1])
def test_pack_unpack_round_trip(self, axes):
rng = np.random.default_rng()
x = pt.tensor("x", shape=(3, 5))
y = pt.tensor("y", shape=(3, 3, 5))
z = pt.tensor("z", shape=(1, 3, 5))
flat_packed, packed_shapes = pack(x, y, z, axes=axes)
new_outputs = unpack(flat_packed, axes=axes, packed_shapes=packed_shapes)
fn = pytensor.function([x, y, z], new_outputs, mode="FAST_COMPILE")
input_dict = {
var.name: rng.normal(size=var.type.shape).astype(config.floatX)
for var in [x, y, z]
}
output_vals = fn(**input_dict)
for input_val, output_val in zip(input_dict.values(), output_vals, strict=True):
np.testing.assert_allclose(input_val, output_val)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论