提交 3bf15cac authored 作者: ricardoV94's avatar ricardoV94 提交者: Ricardo Vieira

Implement index for XTensorVariables

上级 42936163
......@@ -4,7 +4,6 @@ import pytensor.xtensor.rewriting
from pytensor.xtensor import linalg
from pytensor.xtensor.shape import concat
from pytensor.xtensor.type import (
XTensorType,
as_xtensor,
xtensor,
xtensor_constant,
......
# HERE LIE DRAGONS
# Useful links to make sense of all the numpy/xarray complexity
# https://numpy.org/devdocs//user/basics.indexing.html
# https://numpy.org/neps/nep-0021-advanced-indexing.html
# https://docs.xarray.dev/en/latest/user-guide/indexing.html
# https://tutorial.xarray.dev/intermediate/indexing/advanced-indexing.html
from pytensor.graph.basic import Apply, Constant, Variable
from pytensor.scalar.basic import discrete_dtypes
from pytensor.tensor.basic import as_tensor
from pytensor.tensor.type_other import NoneTypeT, SliceType, make_slice
from pytensor.xtensor.basic import XOp, xtensor_from_tensor
from pytensor.xtensor.type import XTensorType, as_xtensor, xtensor
def as_idx_variable(idx, indexed_dim: str):
if idx is None or (isinstance(idx, Variable) and isinstance(idx.type, NoneTypeT)):
raise TypeError(
"XTensors do not support indexing with None (np.newaxis), use expand_dims instead"
)
if isinstance(idx, slice):
idx = make_slice(idx)
elif isinstance(idx, Variable) and isinstance(idx.type, SliceType):
pass
elif (
isinstance(idx, tuple)
and len(idx) == 2
and (
isinstance(idx[0], str)
or (
isinstance(idx[0], tuple | list)
and all(isinstance(d, str) for d in idx[0])
)
)
):
# Special case for ("x", array) that xarray supports
dim, idx = idx
if isinstance(idx, Variable) and isinstance(idx.type, XTensorType):
raise IndexError(
f"Giving a dimension name to an XTensorVariable indexer is not supported: {(dim, idx)}. "
"Use .rename() instead."
)
if isinstance(dim, str):
dims = (dim,)
else:
dims = tuple(dim)
idx = as_xtensor(as_tensor(idx), dims=dims)
else:
# Must be integer / boolean indices, we already counted for None and slices
try:
idx = as_xtensor(idx)
except TypeError:
idx = as_tensor(idx)
if idx.type.ndim > 1:
# Same error that xarray raises
raise IndexError(
"Unlabeled multi-dimensional array cannot be used for indexing"
)
# This is implicitly an XTensorVariable with dim matching the indexed one
idx = xtensor_from_tensor(idx, dims=(indexed_dim,)[: idx.type.ndim])
if idx.type.dtype == "bool":
if idx.type.ndim != 1:
# xarray allaws `x[True]`, but I think it is a bug: https://github.com/pydata/xarray/issues/10379
# Otherwise, it is always restricted to 1d boolean indexing arrays
raise NotImplementedError(
"Only 1d boolean indexing arrays are supported"
)
if idx.type.dims != (indexed_dim,):
raise IndexError(
"Boolean indexer should be unlabeled or on the same dimension to the indexed array. "
f"Indexer is on {idx.type.dims} but the target dimension is {indexed_dim}."
)
# Convert to nonzero indices
idx = as_xtensor(idx.values.nonzero()[0], dims=idx.type.dims)
elif idx.type.dtype not in discrete_dtypes:
raise TypeError("Numerical indices must be integers or boolean")
return idx
def get_static_slice_length(slc: Variable, dim_length: None | int) -> int | None:
if dim_length is None:
return None
if isinstance(slc, Constant):
d = slc.data
start, stop, step = d.start, d.stop, d.step
elif slc.owner is None:
# It's a root variable no way of knowing what we're getting
return None
else:
# It's a MakeSliceOp
start, stop, step = slc.owner.inputs
if isinstance(start, Constant):
start = start.data
else:
return None
if isinstance(stop, Constant):
stop = stop.data
else:
return None
if isinstance(step, Constant):
step = step.data
else:
return None
return len(range(*slice(start, stop, step).indices(dim_length)))
class Index(XOp):
__props__ = ()
def make_node(self, x, *idxs):
x = as_xtensor(x)
if any(idx is Ellipsis for idx in idxs):
if idxs.count(Ellipsis) > 1:
raise IndexError("an index can only have a single ellipsis ('...')")
# Convert intermediate Ellipsis to slice(None)
ellipsis_loc = idxs.index(Ellipsis)
n_implied_none_slices = x.type.ndim - (len(idxs) - 1)
idxs = (
*idxs[:ellipsis_loc],
*((slice(None),) * n_implied_none_slices),
*idxs[ellipsis_loc + 1 :],
)
x_ndim = x.type.ndim
x_dims = x.type.dims
x_shape = x.type.shape
out_dims = []
out_shape = []
def combine_dim_info(idx_dim, idx_dim_shape):
if idx_dim not in out_dims:
# First information about the dimension length
out_dims.append(idx_dim)
out_shape.append(idx_dim_shape)
else:
# Dim already introduced in output by a previous index
# Update static shape or raise if incompatible
out_dim_pos = out_dims.index(idx_dim)
out_dim_shape = out_shape[out_dim_pos]
if out_dim_shape is None:
# We don't know the size of the dimension yet
out_shape[out_dim_pos] = idx_dim_shape
elif idx_dim_shape is not None and idx_dim_shape != out_dim_shape:
raise IndexError(
f"Dimension of indexers mismatch for dim {idx_dim}"
)
if len(idxs) > x_ndim:
raise IndexError("Too many indices")
idxs = [
as_idx_variable(idx, dim) for idx, dim in zip(idxs, x_dims, strict=False)
]
for i, idx in enumerate(idxs):
if isinstance(idx.type, SliceType):
idx_dim = x_dims[i]
idx_dim_shape = get_static_slice_length(idx, x_shape[i])
combine_dim_info(idx_dim, idx_dim_shape)
else:
if idx.type.ndim == 0:
# Scalar index, dimension is dropped
continue
assert isinstance(idx.type, XTensorType)
idx_dims = idx.type.dims
for idx_dim in idx_dims:
idx_dim_shape = idx.type.shape[idx_dims.index(idx_dim)]
combine_dim_info(idx_dim, idx_dim_shape)
for dim_i, shape_i in zip(x_dims[i + 1 :], x_shape[i + 1 :]):
# Add back any unindexed dimensions
if dim_i not in out_dims:
# If the dimension was not indexed, we keep it as is
combine_dim_info(dim_i, shape_i)
output = xtensor(dtype=x.type.dtype, shape=out_shape, dims=out_dims)
return Apply(self, [x, *idxs], [output])
index = Index()
import pytensor.xtensor.rewriting.basic
import pytensor.xtensor.rewriting.indexing
import pytensor.xtensor.rewriting.reduction
import pytensor.xtensor.rewriting.shape
import pytensor.xtensor.rewriting.vectorization
from itertools import zip_longest
from pytensor import as_symbolic
from pytensor.graph import Constant, node_rewriter
from pytensor.tensor import TensorType, arange, specify_shape
from pytensor.tensor.subtensor import _non_consecutive_adv_indexing
from pytensor.tensor.type_other import NoneTypeT, SliceType
from pytensor.xtensor.basic import tensor_from_xtensor, xtensor_from_tensor
from pytensor.xtensor.indexing import Index
from pytensor.xtensor.rewriting.utils import register_lower_xtensor
from pytensor.xtensor.type import XTensorType
def to_basic_idx(idx):
if isinstance(idx.type, SliceType):
if isinstance(idx, Constant):
return idx.data
elif idx.owner:
# MakeSlice Op
# We transform NoneConsts to regular None so that basic Subtensor can be used if possible
return slice(
*[
None if isinstance(i.type, NoneTypeT) else i
for i in idx.owner.inputs
]
)
else:
return idx
if (
isinstance(idx.type, XTensorType)
and idx.type.ndim == 0
and idx.type.dtype != bool
):
return idx.values
raise TypeError("Cannot convert idx to basic idx")
@register_lower_xtensor
@node_rewriter(tracks=[Index])
def lower_index(fgraph, node):
"""Lower XTensorVariable indexing to regular TensorVariable indexing.
xarray-like indexing has two modes:
1. Orthogonal indexing: Indices of different output labeled dimensions are combined to produce all combinations of indices.
2. Vectorized indexing: Indices of the same output labeled dimension are combined point-wise like in regular numpy advanced indexing.
An Index Op can combine both modes.
To achieve orthogonal indexing using numpy semantics we must use multidimensional advanced indexing.
We expand the dims of each index so they are as large as the number of output dimensions, place the indices that
belong to the same output dimension in the same axis, and those that belong to different output dimensions in different axes.
For instance to do an outer 2x2 indexing we can select x[arange(x.shape[0])[:, None], arange(x.shape[1])[None, :]],
This is a generalization of `np.ix_` that allows combining some dimensions, and not others, as well as have
indices that have more than one dimension at the start.
In addition, xarray basic index (slices), can be vectorized with other advanced indices (if they act on the same output dimension).
However, in numpy, basic indices are always orthogonal to advanced indices. To make them behave like vectorized indices
we have to convert the slices to equivalent advanced indices.
We do this by creating an `arange` tensor that matches the shape of the dimension being indexed,
and then indexing it with the original slice. This index is then handled as a regular advanced index.
Note: The IndexOp has only 2 types of indices: Slices and XTensorVariables. Regular array indices
are converted to the appropriate XTensorVariable by `Index.make_node`
"""
x, *idxs = node.inputs
[out] = node.outputs
x_tensor = tensor_from_xtensor(x)
if all(
(
isinstance(idx.type, SliceType)
or (isinstance(idx.type, XTensorType) and idx.type.ndim == 0)
)
for idx in idxs
):
# Special case having just basic indexing
x_tensor_indexed = x_tensor[tuple(to_basic_idx(idx) for idx in idxs)]
else:
# General case, we have to align the indices positionally to achieve vectorized or orthogonal indexing
# May need to convert basic indexing to advanced indexing if it acts on a dimension that is also indexed by an advanced index
x_dims = x.type.dims
x_shape = tuple(x.shape)
out_ndim = out.type.ndim
out_dims = out.type.dims
aligned_idxs = []
basic_idx_axis = []
# zip_longest adds the implicit slice(None)
for i, (idx, x_dim) in enumerate(
zip_longest(idxs, x_dims, fillvalue=as_symbolic(slice(None)))
):
if isinstance(idx.type, SliceType):
if not any(
(
isinstance(other_idx.type, XTensorType)
and x_dim in other_idx.dims
)
for j, other_idx in enumerate(idxs)
if j != i
):
# We can use basic indexing directly if no other index acts on this dimension
# This is an optimization that avoids creating an unnecessary arange tensor
# and facilitates the use of the specialized AdvancedSubtensor1 when possible
aligned_idxs.append(idx)
basic_idx_axis.append(out_dims.index(x_dim))
else:
# Otherwise we need to convert the basic index into an equivalent advanced indexing
# And align it so it interacts correctly with the other advanced indices
adv_idx_equivalent = arange(x_shape[i])[to_basic_idx(idx)]
ds_order = ["x"] * out_ndim
ds_order[out_dims.index(x_dim)] = 0
aligned_idxs.append(adv_idx_equivalent.dimshuffle(ds_order))
else:
assert isinstance(idx.type, XTensorType)
if idx.type.ndim == 0:
# Scalar index, we can use it directly
aligned_idxs.append(idx.values)
else:
# Vector index, we need to align the indexing dimensions with the base_dims
ds_order = ["x"] * out_ndim
for j, idx_dim in enumerate(idx.dims):
ds_order[out_dims.index(idx_dim)] = j
aligned_idxs.append(idx.values.dimshuffle(ds_order))
# Squeeze indexing dimensions that were not used because we kept basic indexing slices
if basic_idx_axis:
aligned_idxs = [
idx.squeeze(axis=basic_idx_axis)
if (isinstance(idx.type, TensorType) and idx.type.ndim > 0)
else idx
for idx in aligned_idxs
]
x_tensor_indexed = x_tensor[tuple(aligned_idxs)]
if basic_idx_axis and _non_consecutive_adv_indexing(aligned_idxs):
# Numpy moves advanced indexing dimensions to the front when they are not consecutive
# We need to transpose them back to the expected output order
x_tensor_indexed_basic_dims = [out_dims[axis] for axis in basic_idx_axis]
x_tensor_indexed_dims = [
dim for dim in out_dims if dim not in x_tensor_indexed_basic_dims
] + x_tensor_indexed_basic_dims
transpose_order = [x_tensor_indexed_dims.index(dim) for dim in out_dims]
x_tensor_indexed = x_tensor_indexed.transpose(transpose_order)
# Add lost shape information
x_tensor_indexed = specify_shape(x_tensor_indexed, out.type.shape)
new_out = xtensor_from_tensor(x_tensor_indexed, dims=out.type.dims)
return [new_out]
import typing
import warnings
from types import EllipsisType
from pytensor.compile import (
......@@ -24,7 +25,7 @@ except ModuleNotFoundError:
XARRAY_AVAILABLE = False
from collections.abc import Sequence
from typing import Literal, TypeVar
from typing import Any, Literal, TypeVar
import numpy as np
......@@ -421,7 +422,106 @@ class XTensorVariable(Variable[_XTensorTypeType, OptionalApplyType]):
raise NotImplementedError("sel not implemented for XTensorVariable")
def __getitem__(self, idx):
raise NotImplementedError("Indexing not yet implemnented")
if isinstance(idx, dict):
return self.isel(idx)
if not isinstance(idx, tuple):
idx = (idx,)
return px.indexing.index(self, *idx)
def isel(
self,
indexers: dict[str, Any] | None = None,
drop: bool = False, # Unused by PyTensor
missing_dims: Literal["raise", "warn", "ignore"] = "raise",
**indexers_kwargs,
):
if indexers_kwargs:
if indexers is not None:
raise ValueError(
"Cannot pass both indexers and indexers_kwargs to isel"
)
indexers = indexers_kwargs
if not indexers:
# No-op
return self
if missing_dims not in {"raise", "warn", "ignore"}:
raise ValueError(
f"Unrecognized options {missing_dims} for missing_dims argument"
)
# Sort indices and pass them to index
dims = self.type.dims
indices = [slice(None)] * self.type.ndim
for key, idx in indexers.items():
if idx is Ellipsis:
# Xarray raises a less informative error, suggesting indices must be integer
# But slices are also fine
raise TypeError("Ellipsis (...) is an invalid labeled index")
try:
indices[dims.index(key)] = idx
except IndexError:
if missing_dims == "raise":
raise ValueError(
f"Dimension {key} does not exist. Expected one of {dims}"
)
elif missing_dims == "warn":
warnings.warn(
f"Dimension {key} does not exist. Expected one of {dims}",
UserWarning,
)
return px.indexing.index(self, *indices)
def _head_tail_or_thin(
self,
indexers: dict[str, Any] | int | None,
indexers_kwargs: dict[str, Any],
*,
kind: Literal["head", "tail", "thin"],
):
if indexers_kwargs:
if indexers is not None:
raise ValueError(
"Cannot pass both indexers and indexers_kwargs to head"
)
indexers = indexers_kwargs
if indexers is None:
if kind == "thin":
raise TypeError(
"thin() indexers must be either dict-like or a single integer"
)
else:
# Default to 5 for head and tail
indexers = {dim: 5 for dim in self.type.dims}
elif not isinstance(indexers, dict):
indexers = {dim: indexers for dim in self.type.dims}
if kind == "head":
indices = {dim: slice(None, value) for dim, value in indexers.items()}
elif kind == "tail":
sizes = self.sizes
# Can't use slice(-value, None), in case value is zero
indices = {
dim: slice(sizes[dim] - value, None) for dim, value in indexers.items()
}
elif kind == "thin":
indices = {dim: slice(None, None, value) for dim, value in indexers.items()}
return self.isel(indices)
def head(self, indexers: dict[str, Any] | int | None = None, **indexers_kwargs):
return self._head_tail_or_thin(indexers, indexers_kwargs, kind="head")
def tail(self, indexers: dict[str, Any] | int | None = None, **indexers_kwargs):
return self._head_tail_or_thin(indexers, indexers_kwargs, kind="tail")
def thin(self, indexers: dict[str, Any] | int | None = None, **indexers_kwargs):
return self._head_tail_or_thin(indexers, indexers_kwargs, kind="thin")
# ndarray methods
# https://docs.xarray.dev/en/latest/api.html#id7
......
# ruff: noqa: E402
import pytest
pytest.importorskip("xarray")
import re
import numpy as np
from xarray import DataArray
from pytensor.tensor import tensor
from pytensor.xtensor import xtensor
from tests.xtensor.util import xr_arange_like, xr_assert_allclose, xr_function
@pytest.mark.parametrize(
"indices",
[
(0,),
(slice(1, None),),
(slice(None, -1),),
(slice(None, None, -1),),
(0, slice(None), -1, slice(1, None)),
(..., 0, -1),
(0, ..., -1),
(0, -1, ...),
],
)
@pytest.mark.parametrize("labeled", (False, True), ids=["unlabeled", "labeled"])
def test_basic_indexing(labeled, indices):
if ... in indices and labeled:
pytest.skip("Ellipsis not supported with labeled indexing")
dims = ("a", "b", "c", "d")
x = xtensor(dims=dims, shape=(2, 3, 5, 7))
if labeled:
shufled_dims = tuple(np.random.permutation(dims))
indices = dict(zip(shufled_dims, indices, strict=False))
out = x[indices]
fn = xr_function([x], out)
x_test_values = np.arange(np.prod(x.type.shape), dtype=x.type.dtype).reshape(
x.type.shape
)
x_test = DataArray(x_test_values, dims=x.type.dims)
res = fn(x_test)
expected_res = x_test[indices]
xr_assert_allclose(res, expected_res)
def test_single_vector_indexing_on_existing_dim():
x = xtensor(dims=("a", "b"), shape=(3, 5))
idx = tensor("idx", dtype=int, shape=(4,))
xidx = xtensor("idx", dtype=int, shape=(4,), dims=("a",))
x_test = xr_arange_like(x)
idx_test = np.array([0, 1, 0, 2], dtype=int)
xidx_test = DataArray(idx_test, dims=("a",))
# Equivalent ways of indexing a->a
y = x[idx]
fn = xr_function([x, idx], y)
res = fn(x_test, idx_test)
expected_res = x_test[idx_test]
xr_assert_allclose(res, expected_res)
y = x[(("a", idx),)]
fn = xr_function([x, idx], y)
res = fn(x_test, idx_test)
expected_res = x_test[(("a", idx_test),)]
xr_assert_allclose(res, expected_res)
y = x[((("a",), idx),)]
fn = xr_function([x, idx], y)
res = fn(x_test, idx_test)
expected_res = x_test[((("a",), idx_test),)]
xr_assert_allclose(res, expected_res)
y = x[xidx]
fn = xr_function([x, xidx], y)
res = fn(x_test, xidx_test)
expected_res = x_test[xidx_test]
xr_assert_allclose(res, expected_res)
def test_single_vector_indexing_on_new_dim():
x = xtensor(dims=("a", "b"), shape=(3, 5))
idx = tensor("idx", dtype=int, shape=(4,))
xidx = xtensor("idx", dtype=int, shape=(4,), dims=("new_a",))
x_test = xr_arange_like(x)
idx_test = np.array([0, 1, 0, 2], dtype=int)
xidx_test = DataArray(idx_test, dims=("new_a",))
# Equivalent ways of indexing a->new_a
y = x[(("new_a", idx),)]
fn = xr_function([x, idx], y)
res = fn(x_test, idx_test)
expected_res = x_test[(("new_a", idx_test),)]
xr_assert_allclose(res, expected_res)
y = x[((["new_a"], idx),)]
fn = xr_function([x, idx], y)
res = fn(x_test, idx_test)
expected_res = x_test[((["new_a"], idx_test),)]
xr_assert_allclose(res, expected_res)
y = x[xidx]
fn = xr_function([x, xidx], y)
res = fn(x_test, xidx_test)
expected_res = x_test[xidx_test]
xr_assert_allclose(res, expected_res)
def test_single_vector_indexing_interacting_with_existing_dim():
x = xtensor(dims=("a", "b"), shape=(3, 5))
idx = tensor("idx", dtype=int, shape=(4,))
xidx = xtensor("idx", dtype=int, shape=(4,), dims=("a",))
x_test = xr_arange_like(x)
idx_test = np.array([0, 1, 0, 2], dtype=int)
xidx_test = DataArray(idx_test, dims=("a",))
# Two equivalent ways of indexing a->b
# By labeling the index on a, as "b", we cause pointwise indexing between the two dimensions.
y = x[("b", idx), 1:]
fn = xr_function([x, idx], y)
res = fn(x_test, idx_test)
expected_res = x_test[("b", idx_test), 1:]
xr_assert_allclose(res, expected_res)
y = x[xidx.rename(a="b"), 1:]
fn = xr_function([x, xidx], y)
res = fn(x_test, xidx_test)
expected_res = x_test[xidx_test.rename(a="b"), 1:]
xr_assert_allclose(res, expected_res)
@pytest.mark.parametrize(
"dims_order",
[
("a", "b", "ar", "br", "o"),
("o", "br", "ar", "b", "a"),
("a", "b", "o", "ar", "br"),
("a", "o", "ar", "b", "br"),
],
)
def test_multiple_vector_indexing(dims_order):
x = xtensor(dims=dims_order, shape=(5, 7, 11, 13, 17))
idx_a = xtensor("idx_a", dtype=int, shape=(4,), dims=("a",))
idx_b = xtensor("idx_b", dtype=int, shape=(3,), dims=("b",))
idxs = [slice(None)] * 5
idxs[x.type.dims.index("a")] = idx_a
idxs[x.type.dims.index("b")] = idx_b
idxs[x.type.dims.index("ar")] = idx_a[::-1]
idxs[x.type.dims.index("br")] = idx_b[::-1]
out = x[tuple(idxs)]
fn = xr_function([x, idx_a, idx_b], out)
x_test = xr_arange_like(x)
idx_a_test = DataArray(np.array([0, 1, 0, 2], dtype=int), dims=("a",))
idx_b_test = DataArray(np.array([1, 3, 0], dtype=int), dims=("b",))
res = fn(x_test, idx_a_test, idx_b_test)
idxs_test = [slice(None)] * 5
idxs_test[x.type.dims.index("a")] = idx_a_test
idxs_test[x.type.dims.index("b")] = idx_b_test
idxs_test[x.type.dims.index("ar")] = idx_a_test[::-1]
idxs_test[x.type.dims.index("br")] = idx_b_test[::-1]
expected_res = x_test[tuple(idxs_test)]
xr_assert_allclose(res, expected_res)
def test_matrix_indexing():
x = xtensor(dims=("a", "b", "c"), shape=(3, 5, 7))
idx_ab = xtensor("idx_ab", dtype=int, shape=(4, 2), dims=("a", "b"))
idx_cd = xtensor("idx_cd", dtype=int, shape=(4, 3), dims=("c", "d"))
out = x[idx_ab, slice(1, 3), idx_cd]
fn = xr_function([x, idx_ab, idx_cd], out)
x_test = xr_arange_like(x)
idx_ab_test = DataArray(
np.array([[0, 1], [1, 2], [0, 2], [-1, -2]], dtype=int), dims=("a", "b")
)
idx_cd_test = DataArray(
np.array([[1, 2, 3], [0, 4, 5], [2, 6, -1], [3, -2, 0]], dtype=int),
dims=("c", "d"),
)
res = fn(x_test, idx_ab_test, idx_cd_test)
expected_res = x_test[idx_ab_test, slice(1, 3), idx_cd_test]
xr_assert_allclose(res, expected_res)
def test_assign_multiple_out_dims():
x = xtensor("x", shape=(5, 7), dims=("a", "b"))
idx1 = tensor("idx1", dtype=int, shape=(4, 3))
idx2 = tensor("idx2", dtype=int, shape=(3, 2))
out = x[(("out1", "out2"), idx1), (["out2", "out3"], idx2)]
fn = xr_function([x, idx1, idx2], out)
rng = np.random.default_rng()
x_test = xr_arange_like(x)
idx1_test = rng.binomial(n=4, p=0.5, size=(4, 3))
idx2_test = rng.binomial(n=4, p=0.5, size=(3, 2))
res = fn(x_test, idx1_test, idx2_test)
expected_res = x_test[(("out1", "out2"), idx1_test), (["out2", "out3"], idx2_test)]
xr_assert_allclose(res, expected_res)
def test_assign_indexer_dims_fails():
# Test cases where the implicit naming of the indexer dimensions is not allowed.
x = xtensor("x", shape=(5, 7), dims=("a", "b"))
idx1 = xtensor("idx1", dtype=int, shape=(4,), dims=("c",))
with pytest.raises(
IndexError,
match=re.escape(
"Giving a dimension name to an XTensorVariable indexer is not supported: ('d', idx1). "
"Use .rename() instead."
),
):
x[("d", idx1),]
with pytest.raises(
IndexError,
match=re.escape(
"Boolean indexer should be unlabeled or on the same dimension to the indexed array. "
"Indexer is on ('c',) but the target dimension is a."
),
):
x[idx1.astype("bool")]
class TestVectorizedIndexingNotAllowedToBroadcast:
def test_compile_time_error(self):
x = xtensor(dims=("a", "b"), shape=(3, 5))
idx_a = xtensor("idx_a", dtype=int, shape=(4,), dims=("b",))
idx_b = xtensor("idx_b", dtype=int, shape=(1,), dims=("b",))
with pytest.raises(
IndexError, match="Dimension of indexers mismatch for dim b"
):
x[idx_a, idx_b]
@pytest.mark.xfail(
reason="Check that lowered indexing is not allowed to broadcast not implemented yet"
)
def test_runtime_error(self):
"""
Test that, unlike in numpy, indices with different shapes cannot act on the same dimension,
even if the shapes could broadcast as per numpy semantics.
"""
x = xtensor(dims=("a", "b"), shape=(3, 5))
idx_a = xtensor("idx_a", dtype=int, shape=(None,), dims=("b",))
idx_b = xtensor("idx_b", dtype=int, shape=(None,), dims=("b",))
out = x[idx_a, idx_b]
fn = xr_function([x, idx_a, idx_b], out)
x_test = xr_arange_like(x)
valid_idx_a_test = DataArray(np.array([0], dtype=int), dims=("b",))
idx_b_test = DataArray(np.array([1], dtype=int), dims=("b",))
xr_assert_allclose(
fn(x_test, valid_idx_a_test, idx_b_test),
x_test[valid_idx_a_test, idx_b_test],
)
invalid_idx_a_test = DataArray(np.array([0, 1, 0, 1], dtype=int), dims=("b",))
with pytest.raises(ValueError):
fn(x_test, invalid_idx_a_test, idx_b_test)
@pytest.mark.parametrize(
"dims_order",
[
("a", "b", "c", "d"),
("d", "c", "b", "a"),
("c", "a", "b", "d"),
],
)
def test_scalar_integer_indexing(dims_order):
x = xtensor(dims=dims_order, shape=(3, 5, 7, 11))
scalar_idx = xtensor("scalar_idx", dtype=int, shape=(), dims=())
vec_idx1 = xtensor("vec_idx", dtype=int, shape=(4,), dims=("a",))
vec_idx2 = xtensor("vec_idx2", dtype=int, shape=(4,), dims=("c",))
idxs = [None] * 4
idxs[x.type.dims.index("a")] = scalar_idx
idxs[x.type.dims.index("b")] = vec_idx1
idxs[x.type.dims.index("c")] = vec_idx2
idxs[x.type.dims.index("d")] = -scalar_idx
out1 = x[tuple(idxs)]
idxs[x.type.dims.index("a")] = vec_idx1.rename(a="c")
out2 = x[tuple(idxs)]
fn = xr_function([x, scalar_idx, vec_idx1, vec_idx2], (out1, out2))
x_test = xr_arange_like(x)
scalar_idx_test = DataArray(np.array(1, dtype=int), dims=())
vec_idx_test1 = DataArray(np.array([0, 1, 0, 2], dtype=int), dims=("a",))
vec_idx_test2 = DataArray(np.array([0, 2, 2, 1], dtype=int), dims=("c",))
res1, res2 = fn(x_test, scalar_idx_test, vec_idx_test1, vec_idx_test2)
idxs = [None] * 4
idxs[x.type.dims.index("a")] = scalar_idx_test
idxs[x.type.dims.index("b")] = vec_idx_test1
idxs[x.type.dims.index("c")] = vec_idx_test2
idxs[x.type.dims.index("d")] = -scalar_idx_test
expected_res1 = x_test[tuple(idxs)]
idxs[x.type.dims.index("a")] = vec_idx_test1.rename(a="c")
expected_res2 = x_test[tuple(idxs)]
xr_assert_allclose(res1, expected_res1)
xr_assert_allclose(res2, expected_res2)
def test_unsupported_boolean_indexing():
x = xtensor(dims=("a", "b"), shape=(3, 5))
mat_idx = xtensor("idx", dtype=bool, shape=(4, 2), dims=("a", "b"))
scalar_idx = mat_idx.isel(a=0, b=1)
for idx in (mat_idx, scalar_idx, scalar_idx.values):
with pytest.raises(
NotImplementedError,
match="Only 1d boolean indexing arrays are supported",
):
x[idx]
def test_boolean_indexing():
x = xtensor("x", shape=(8, 7), dims=("a", "b"))
bool_idx = xtensor("bool_idx", dtype=bool, shape=(8,), dims=("a",))
int_idx = xtensor("int_idx", dtype=int, shape=(4, 3), dims=("a", "new_dim"))
out_vectorized = x[bool_idx, int_idx]
out_orthogonal = x[bool_idx, int_idx.rename(a="b")]
fn = xr_function([x, bool_idx, int_idx], [out_vectorized, out_orthogonal])
x_test = xr_arange_like(x)
bool_idx_test = DataArray(np.array([True, False] * 4, dtype=bool), dims=("a",))
int_idx_test = DataArray(
np.random.binomial(n=4, p=0.5, size=(4, 3)),
dims=("a", "new_dim"),
)
res1, res2 = fn(x_test, bool_idx_test, int_idx_test)
expected_res1 = x_test[bool_idx_test, int_idx_test]
expected_res2 = x_test[bool_idx_test, int_idx_test.rename(a="b")]
xr_assert_allclose(res1, expected_res1)
xr_assert_allclose(res2, expected_res2)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论