提交 e3fb4985 authored 作者: lucianopaz's avatar lucianopaz 提交者: Ricardo Vieira

Fix tensordot implementation

上级 f799219e
import builtins import builtins
import warnings import warnings
from typing import TYPE_CHECKING, Optional from collections.abc import Sequence
from typing import TYPE_CHECKING, Optional, Union
import numpy as np import numpy as np
from numpy.core.numeric import normalize_axis_tuple
from pytensor import config, printing from pytensor import config, printing
from pytensor import scalar as ps from pytensor import scalar as ps
...@@ -15,7 +17,9 @@ from pytensor.link.c.params_type import ParamsType ...@@ -15,7 +17,9 @@ from pytensor.link.c.params_type import ParamsType
from pytensor.link.c.type import Generic from pytensor.link.c.type import Generic
from pytensor.misc.safe_asarray import _asarray from pytensor.misc.safe_asarray import _asarray
from pytensor.printing import pprint from pytensor.printing import pprint
from pytensor.raise_op import Assert
from pytensor.scalar.basic import BinaryScalarOp from pytensor.scalar.basic import BinaryScalarOp
from pytensor.tensor import TensorLike
from pytensor.tensor.basic import ( from pytensor.tensor.basic import (
alloc, alloc,
arange, arange,
...@@ -47,7 +51,11 @@ from pytensor.tensor.type import ( ...@@ -47,7 +51,11 @@ from pytensor.tensor.type import (
) )
from pytensor.tensor.type_other import NoneConst from pytensor.tensor.type_other import NoneConst
from pytensor.tensor.utils import as_list from pytensor.tensor.utils import as_list
from pytensor.tensor.variable import TensorConstant, _tensor_py_operators from pytensor.tensor.variable import (
TensorConstant,
TensorVariable,
_tensor_py_operators,
)
if TYPE_CHECKING: if TYPE_CHECKING:
...@@ -2266,57 +2274,47 @@ def _tensordot_as_dot(a, b, axes, dot, batched): ...@@ -2266,57 +2274,47 @@ def _tensordot_as_dot(a, b, axes, dot, batched):
) )
def tensordot(a, b, axes=2): def tensordot(
a: TensorLike, b: TensorLike, axes: Union[int, Sequence[Sequence[int]]] = 2
) -> TensorVariable:
""" """
Compute a generalized dot product over provided axes. Compute tensor dot product along specified axes.
Implementation is mostly taken from numpy version 1.26.0
Given two tensors a and b, tensordot computes a generalized dot product over Given two tensors, `a` and `b`, and a sequence object containing
the provided axes. PyTensor's implementation reduces all expressions to two sequence objects, ``(a_axes, b_axes)``, sum the products of
matrix or vector dot products and is based on code from Tijmen Tieleman's `a`'s and `b`'s elements (components) over the axes specified by
gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html). ``a_axes`` and ``b_axes``. The third argument can be a single non-negative
integer_like scalar, ``N``; if it is such, then the last ``N`` dimensions
of `a` and the first ``N`` dimensions of `b` are summed over.
Parameters Parameters
---------- ----------
a: symbolic tensor a, b : tensor_like
The first tensor variable. Tensors to "dot".
b: symbolic tensor
The second tensor variable axes : int or (2,) array_like
axes: int or array-like of length 2 * integer_like
If an integer, the number of axes to sum over. If an int N, sum over the last N axes of `a` and the first N axes
If an array, it must have two array elements containing the axes of `b` in order. The sizes of the corresponding axes must match.
to sum over in each tensor. * (2,) array_like
Or, a list of axes to be summed over, first sequence applying to `a`,
Note that the default value of 2 is not guaranteed to work second to `b`. Both elements array_like must be of the same length.
for all values of a and b, and an error will be raised if
that is the case. The reason for keeping the default is to
maintain the same signature as numpy's tensordot function
(and np.tensordot raises analogous errors for non-compatible
inputs).
If an integer i, it is converted to an array containing
the last i dimensions of the first tensor and the first
i dimensions of the second tensor:
axes = [list(range(a.ndim - i, b.ndim)), list(range(i))]
If an array, its two elements must contain compatible axes
of the two tensors. For example, [[1, 2], [2, 0]] means sum
over the 2nd and 3rd axes of a and the 3rd and 1st axes of b.
(Remember axes are zero-indexed!) The 2nd axis of a and the
3rd axis of b must have the same shape; the same is true for
the 3rd axis of a and the 1st axis of b.
Returns Returns
------- -------
symbolic tensor output : TensorVariable
A tensor with shape equal to the concatenation of a's shape The tensor dot product of the input.
(less any dimensions that were summed over) and b's shape Its shape will be equal to the concatenation of `a` and `b` shapes
(less any dimensions that were summed over). (ignoring the dimensions that were summed over given in ``a_axes``
and ``b_axes``)
Examples Examples
-------- --------
It may be helpful to consider an example to see what tensordot does. It may be helpful to consider an example to see what tensordot does.
PyTensor's implementation is identical to NumPy's. Here a has shape (2, 3, 4) PyTensor's implementation is identical to NumPy's. Here ``a`` has shape (2, 3, 4)
and b has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] -- and ``b`` has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] --
note that a.shape[1] == b.shape[3] and a.shape[2] == b.shape[2]; these axes note that a.shape[1] == b.shape[3] and a.shape[2] == b.shape[2]; these axes
are compatible. The resulting tensor will have shape (2, 5, 6) -- the are compatible. The resulting tensor will have shape (2, 5, 6) -- the
dimensions that are not being summed: dimensions that are not being summed:
...@@ -2347,10 +2345,9 @@ def tensordot(a, b, axes=2): ...@@ -2347,10 +2345,9 @@ def tensordot(a, b, axes=2):
true true
This specific implementation avoids a loop by transposing a and b such that This specific implementation avoids a loop by transposing a and b such that
the summed axes of a are last and the summed axes of b are first. The the summed axes of ``a`` are last and the summed axes of ``b`` are first. The
resulting arrays are reshaped to 2 dimensions (or left as vectors, if resulting arrays are reshaped to 2 dimensions and a matrix dot product is taken.
appropriate) and a matrix or vector dot product is taken. The result is The result is reshaped back to the required output dimensions.
reshaped back to the required output dimensions.
In an extreme case, no axes may be specified. The resulting tensor In an extreme case, no axes may be specified. The resulting tensor
will have shape equal to the concatenation of the shapes of a and b: will have shape equal to the concatenation of the shapes of a and b:
...@@ -2366,7 +2363,85 @@ def tensordot(a, b, axes=2): ...@@ -2366,7 +2363,85 @@ def tensordot(a, b, axes=2):
See the documentation of numpy.tensordot for more examples. See the documentation of numpy.tensordot for more examples.
""" """
return _tensordot_as_dot(a, b, axes, dot=dot, batched=False) try:
iter(axes)
except Exception:
axes_a = list(range(-axes, 0))
axes_b = list(range(0, axes))
else:
axes_a, axes_b = axes
try:
na = len(axes_a)
axes_a = list(axes_a)
except TypeError:
axes_a = [axes_a]
na = 1
try:
nb = len(axes_b)
axes_b = list(axes_b)
except TypeError:
axes_b = [axes_b]
nb = 1
a = as_tensor_variable(a)
b = as_tensor_variable(b)
runtime_shape_a = a.shape
bcast_a = a.broadcastable
static_shape_a = a.type.shape
ndim_a = a.ndim
runtime_shape_b = b.shape
bcast_b = b.broadcastable
static_shape_b = b.type.shape
ndim_b = b.ndim
if na != nb:
raise ValueError(
"The number of axes supplied for tensordot must be equal for each tensor. "
f"Got {na} and {nb} respectively."
)
axes_a = list(normalize_axis_tuple(axes_a, ndim_a))
axes_b = list(normalize_axis_tuple(axes_b, ndim_b))
must_assert_runtime = False
for k in range(na):
ax_a = axes_a[k]
ax_b = axes_b[k]
if (bcast_a[ax_a] != bcast_b[ax_b]) or (
static_shape_a[ax_a] is not None
and static_shape_b[ax_b] is not None
and static_shape_a[ax_a] != static_shape_b[ax_b]
):
raise ValueError(
"Input arrays have inconsistent broadcastable pattern or type shape along the axes "
"that are to be reduced with tensordot."
)
elif static_shape_a[ax_a] is None or static_shape_b[ax_b] is None:
if must_assert_runtime:
a = Assert(
"Input array shape along reduced axes of tensordot are not equal"
)(a, eq(a.shape[ax_a], b.shape[ax_b]))
must_assert_runtime = True
# Move the axes to sum over to the end of "a"
# and to the front of "b"
notin = [k for k in range(ndim_a) if k not in axes_a]
newaxes_a = notin + axes_a
N2 = 1
for axis in axes_a:
N2 *= runtime_shape_a[axis]
newshape_a = (-1, N2)
olda = [runtime_shape_a[axis] for axis in notin]
notin = [k for k in range(ndim_b) if k not in axes_b]
newaxes_b = axes_b + notin
N2 = 1
for axis in axes_b:
N2 *= runtime_shape_b[axis]
newshape_b = (N2, -1)
oldb = [runtime_shape_b[axis] for axis in notin]
at = a.transpose(newaxes_a).reshape(newshape_a)
bt = b.transpose(newaxes_b).reshape(newshape_b)
res = _dot(at, bt)
return res.reshape(olda + oldb)
def outer(x, y): def outer(x, y):
......
...@@ -18,18 +18,20 @@ from pytensor.compile.mode import get_default_mode ...@@ -18,18 +18,20 @@ from pytensor.compile.mode import get_default_mode
from pytensor.compile.sharedvalue import shared from pytensor.compile.sharedvalue import shared
from pytensor.configdefaults import config from pytensor.configdefaults import config
from pytensor.gradient import NullTypeGradError, grad, numeric_grad from pytensor.gradient import NullTypeGradError, grad, numeric_grad
from pytensor.graph.basic import Variable, applys_between from pytensor.graph.basic import Variable, ancestors, applys_between
from pytensor.graph.fg import FunctionGraph from pytensor.graph.fg import FunctionGraph
from pytensor.graph.replace import vectorize_node from pytensor.graph.replace import vectorize_node
from pytensor.link.c.basic import DualLinker from pytensor.link.c.basic import DualLinker
from pytensor.misc.safe_asarray import _asarray from pytensor.misc.safe_asarray import _asarray
from pytensor.printing import pprint from pytensor.printing import pprint
from pytensor.raise_op import Assert
from pytensor.tensor import blas, blas_c from pytensor.tensor import blas, blas_c
from pytensor.tensor.basic import ( from pytensor.tensor.basic import (
as_tensor_variable, as_tensor_variable,
constant, constant,
eye, eye,
get_underlying_scalar_constant_value, get_underlying_scalar_constant_value,
ones,
switch, switch,
) )
from pytensor.tensor.blas import Dot22 from pytensor.tensor.blas import Dot22
...@@ -2208,6 +2210,96 @@ class TestTensordot: ...@@ -2208,6 +2210,96 @@ class TestTensordot:
zv = f(xv, yv) zv = f(xv, yv)
assert np.allclose(np.tensordot(xv, yv, axes=axes), zv) assert np.allclose(np.tensordot(xv, yv, axes=axes), zv)
def test_type_shape(self):
x = ones(shape=(7, 3, 2))
y = ones(
shape=(
10,
2,
)
)
xv = x.eval()
yv = y.eval()
sy = tensor("sy", shape=(None, 2))
axes = [[-1], [-1]]
z = tensordot(x, y, axes=axes)
sz = tensordot(x, sy, axes=axes)
assert (
len(
{
node
for node in ancestors([z])
if node.owner and isinstance(node.owner.op, Assert)
}
)
== 0
)
assert z.type.shape == (7, 3, 10)
assert z.broadcastable == (False, False, False)
assert np.allclose(np.tensordot(xv, yv, axes=axes), z.eval())
assert (
len(
{
node
for node in ancestors([sz])
if node.owner and isinstance(node.owner.op, Assert)
}
)
== 0
)
assert sz.type.shape == (7, 3, None)
assert z.broadcastable == (False, False, False)
assert np.allclose(np.tensordot(xv, yv, axes=axes), sz.eval({sy: yv}))
with pytest.raises(
ValueError,
match="Input arrays have inconsistent broadcastable pattern or type shape",
):
tensordot(ones(shape=(7, 4)), ones(shape=(7, 4)), axes=1)
@pytest.mark.parametrize(
["axes", "has_assert", "values", "expected_fail"],
[
([[1], [2]], False, (np.ones((7, 3, 2)), np.ones((7, 2, 3))), False),
([[0, 2], [0, 1]], True, (np.ones((7, 3, 2)), np.ones((7, 2, 3))), False),
([[0], [0]], False, (np.ones((7, 3, 1)), np.ones((100, 1, 3))), True),
([[1, 2], [1, 2]], True, (np.ones((7, 3, 2)), np.ones((7, 2, 3))), True),
],
)
def test_shape_assert(self, axes, has_assert, values, expected_fail):
x = tensor(shape=(7, 3, None))
y = tensor(shape=(None, None, 3))
xv, yv = values
xv = xv.astype(x.dtype)
yv = yv.astype(x.dtype)
z = tensordot(x, y, axes=axes)
found_asserts = {
node
for node in ancestors([z])
if node.owner and isinstance(node.owner.op, Assert)
}
if has_assert:
assert found_asserts
else:
assert not found_asserts
if expected_fail:
if has_assert:
with pytest.raises(
AssertionError,
match="Input array shape along reduced axes of tensordot are not equal",
):
z.eval({x: xv, y: yv})
else:
with pytest.raises(ValueError):
z.eval({x: xv, y: yv})
else:
assert np.allclose(np.tensordot(xv, yv, axes=axes), z.eval({x: xv, y: yv}))
def test_smallest(): def test_smallest():
x = dvector() x = dvector()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论