Merge CAReduce and CAReduceDtype

c0d2c635 · Brandon T. Willard · Ricardo Vieira · a5626b0a · c0d2c635 · c0d2c635
--- a/pytensor/tensor/elemwise.py
+++ b/pytensor/tensor/elemwise.py
--- a/pytensor/tensor/math.py
+++ b/pytensor/tensor/math.py
@@ -25,13 +25,7 @@ from pytensor.tensor.basic import (
    stack,
    switch,
 )
-from pytensor.tensor.elemwise import (
-    CAReduce,
-    CAReduceDtype,
-    DimShuffle,
-    Elemwise,
-    scalar_elemwise,
-)
+from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise, scalar_elemwise
 from pytensor.tensor.shape import shape, specify_broadcastable
 from pytensor.tensor.type import (
    DenseTensorType,
@@ -633,6 +627,10 @@ class Max(NonZeroCAReduce):
    def __init__(self, axis):
        super().__init__(aes.scalar_maximum, axis)

+    def clone(self, **kwargs):
+        axis = kwargs.get("axis", self.axis)
+        return type(self)(axis=axis)
+

 class Min(NonZeroCAReduce):
    nfunc_spec = ("min", 1, 1)
@@ -640,6 +638,10 @@ class Min(NonZeroCAReduce):
    def __init__(self, axis):
        super().__init__(aes.scalar_minimum, axis)

+    def clone(self, **kwargs):
+        axis = kwargs.get("axis", self.axis)
+        return type(self)(axis=axis)
+

 def max(x, axis=None, keepdims=False):
    """
@@ -1530,6 +1532,10 @@ class Mean(CAReduce):
  """
        )

+    def clone(self, **kwargs):
+        axis = kwargs.get("axis", self.axis)
+        return type(self)(axis=axis)
+

 # TODO: implement the grad. When done and tested, you can make this the default
 # version.
@@ -2350,7 +2356,6 @@ class All(CAReduce):

    """

-    __props__ = ("axis",)
    nfunc_spec = ("all", 1, 1)

    def __init__(self, axis=None):
@@ -2376,6 +2381,10 @@ class All(CAReduce):
        (x,) = inp
        return [x.zeros_like(config.floatX)]

+    def clone(self, **kwargs):
+        axis = kwargs.get("axis", self.axis)
+        return type(self)(axis=axis)
+

 class Any(CAReduce):
    """Applies `bitwise or` to all the values of a tensor along the
@@ -2383,7 +2392,6 @@ class Any(CAReduce):

    """

-    __props__ = ("axis",)
    nfunc_spec = ("any", 1, 1)

    def __init__(self, axis=None):
@@ -2409,48 +2417,31 @@ class Any(CAReduce):
        (x,) = inp
        return [x.zeros_like(config.floatX)]

+    def clone(self, **kwargs):
+        axis = kwargs.get("axis", self.axis)
+        return type(self)(axis=axis)

-class Sum(CAReduceDtype):
+
+class Sum(CAReduce):
    """
    Sums all the values of a tensor along the specified axis(es).

-    Equivalent to `CAReduceDtype(scalar.add, axis=axis, dtype=dtype)`,
+    Equivalent to `CAReduce(scalar.add, axis=axis, dtype=dtype)`,
    with the difference that this defines the gradient of sum wrt its
    tensor input.

-    Parameters
-    ----------
-    axis
-        Axis(es) along which the tensor should be summed
-        (use None to sum over all axes, and a list or tuple to sum along more
-        than one axis).
-
-    dtype
-        The dtype of the internal accumulator and returned
-        tensor. If None, then we use the default dtype which is the same as the
-        input tensor's dtype except when:
-        - the input dtype is a signed integer of precision < 64 bit, in
-        which case we use int64
-        - the input dtype is an unsigned integer of precision < 64 bit, in
-        which case we use uint64
-        This value does not depend on the value of "acc_dtype".
-
-    acc_dtype
-        The dtype of the internal accumulator.
-        If None (default), we use the dtype in the list below,
-        or the input dtype if its precision is higher:
-        - for int dtypes, we use at least int64;
-        - for uint dtypes, we use at least uint64;
-        - for float dtypes, we use at least float64;
-        - for complex dtypes, we use at least complex128.
-
    """

-    __props__ = ("axis", "dtype", "acc_dtype")
    nfunc_spec = ("sum", 1, 1)

    def __init__(self, axis=None, dtype=None, acc_dtype=None):
-        super().__init__(aes.add, axis=axis, dtype=dtype, acc_dtype=acc_dtype)
+        super().__init__(
+            aes.add,
+            axis=axis,
+            dtype=dtype,
+            acc_dtype=acc_dtype,
+            upcast_discrete_output=True,
+        )

    def __str__(self):
        name = self.__class__.__name__
@@ -2492,6 +2483,12 @@ class Sum(CAReduceDtype):
            return [None]
        return self(*eval_points, return_list=True)

+    def clone(self, **kwargs):
+        axis = kwargs.get("axis", self.axis)
+        dtype = kwargs.get("dtype", self.dtype)
+        acc_dtype = kwargs.get("acc_dtype", self.acc_dtype)
+        return type(self)(axis=axis, dtype=dtype, acc_dtype=acc_dtype)
+

 def sum(input, axis=None, dtype=None, keepdims=False, acc_dtype=None):
    """
@@ -2523,7 +2520,7 @@ def sum(input, axis=None, dtype=None, keepdims=False, acc_dtype=None):
 pprint.assign(Sum, printing.FunctionPrinter(["sum"], ["axis"]))


-class Prod(CAReduceDtype):
+class Prod(CAReduce):
    """
    Multiplies all the values of a tensor along the specified axis(es).

@@ -2533,19 +2530,20 @@ class Prod(CAReduceDtype):

    """

-    __props__ = ("axis", "dtype", "acc_dtype")
+    __props__ = ("scalar_op", "axis", "dtype", "acc_dtype", "no_zeros_in_input")
+
    nfunc_spec = ("prod", 1, 1)

    def __init__(self, axis=None, dtype=None, acc_dtype=None, no_zeros_in_input=False):
-        super().__init__(aes.mul, axis=axis, dtype=dtype, acc_dtype=acc_dtype)
+        super().__init__(
+            aes.mul,
+            axis=axis,
+            dtype=dtype,
+            acc_dtype=acc_dtype,
+            upcast_discrete_output=True,
+        )
        self.no_zeros_in_input = no_zeros_in_input

-    def __setstate__(self, dct):
-        super().__setstate__(dct)
-        # Add default value to be able to reload old pickled objects.
-        if "no_zeros_in_input" not in dct:
-            self.no_zeros_in_input = False
-
    def L_op(self, inp, out, grads):
        """
        The grad of this Op could be very easy, if it is was not for the case
@@ -2668,6 +2666,18 @@ class Prod(CAReduceDtype):
    def c_code_cache_version(self):
        return (1,)

+    def clone(self, **kwargs):
+        axis = kwargs.get("axis", self.axis)
+        dtype = kwargs.get("dtype", self.dtype)
+        acc_dtype = kwargs.get("acc_dtype", self.acc_dtype)
+        no_zeros_in_input = kwargs.get("no_zeros_in_input", self.no_zeros_in_input)
+        return type(self)(
+            axis=axis,
+            dtype=dtype,
+            acc_dtype=acc_dtype,
+            no_zeros_in_input=no_zeros_in_input,
+        )
+

 def prod(
    input,
@@ -2736,12 +2746,15 @@ class MulWithoutZeros(BinaryScalarOp):
 mul_without_zeros = MulWithoutZeros(aes.upcast_out, name="mul_without_zeros")


-class ProdWithoutZeros(CAReduceDtype):
-
-    __props__ = ("axis", "dtype", "acc_dtype")
-
+class ProdWithoutZeros(CAReduce):
    def __init__(self, axis=None, dtype=None, acc_dtype=None):
-        super().__init__(mul_without_zeros, axis=axis, dtype=dtype, acc_dtype=acc_dtype)
+        super().__init__(
+            mul_without_zeros,
+            axis=axis,
+            dtype=dtype,
+            acc_dtype=acc_dtype,
+            upcast_discrete_output=True,
+        )

    def grad(self, inp, grads):
        from pytensor.gradient import grad_not_implemented
@@ -2757,6 +2770,12 @@ class ProdWithoutZeros(CAReduceDtype):
        )
        return [a_grad]

+    def clone(self, **kwargs):
+        axis = kwargs.get("axis", self.axis)
+        dtype = kwargs.get("dtype", self.dtype)
+        acc_dtype = kwargs.get("acc_dtype", self.acc_dtype)
+        return type(self)(axis=axis, dtype=dtype, acc_dtype=acc_dtype)
+

 def any(x, axis=None, keepdims=False):
    out = Any(axis)(x)

--- a/tests/tensor/test_elemwise.py
+++ b/tests/tensor/test_elemwise.py
@@ -17,7 +17,7 @@ from pytensor.link.basic import PerformLinker
 from pytensor.link.c.basic import CLinker, OpWiseCLinker
 from pytensor.tensor import as_tensor_variable
 from pytensor.tensor.basic import second
-from pytensor.tensor.elemwise import CAReduce, CAReduceDtype, DimShuffle, Elemwise
+from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise
 from pytensor.tensor.exceptions import ShapeError
 from pytensor.tensor.math import all as at_all
 from pytensor.tensor.math import any as at_any
@@ -537,24 +537,16 @@ class TestCAReduce(unittest_tools.InferShapeTester):
                for axis in reversed(sorted(tosum)):
                    zv = np.bitwise_xor.reduce(zv, axis)
            else:
-                raise Exception(
+                raise NotImplementedError(
                    f"Test for CAReduce with scalar_op {scalar_op} not implemented"
                )

            if test_nan:
-                try:
-                    assert self.type.values_eq(f(xv), zv), (f(xv), zv)
-                except NotImplementedError:
-                    # GpuCAReduce don't implement all cases when size is 0
-                    assert xv.size == 0
+                assert self.type.values_eq(f(xv), zv), (f(xv), zv)
            else:
-                try:
-                    f_xv = f(xv)
-                    assert f_xv.shape == zv.shape, (f_xv, zv)
-                    utt.assert_allclose(zv, f_xv)
-                except NotImplementedError:
-                    # GpuCAReduce don't implement all cases when size is 0
-                    assert xv.size == 0
+                f_xv = f(xv)
+                assert f_xv.shape == zv.shape, (f_xv, zv)
+                utt.assert_allclose(zv, f_xv)

            x = self.type(
                dtype, shape=tuple(entry if entry == 1 else None for entry in xsh)
@@ -570,11 +562,7 @@ class TestCAReduce(unittest_tools.InferShapeTester):
                scalar_op in [aes.scalar_maximum, aes.scalar_minimum]
                and (xsh == () or np.prod(xsh) == 0)
            ):
-                try:
-                    assert all(f(xv) == zv.shape)
-                except NotImplementedError:
-                    # GpuCAReduce don't implement all cases when size is 0
-                    assert xv.size == 0
+                assert all(f(xv) == zv.shape)

    def test_perform_noopt(self):
        self.with_mode(Mode(linker="py", optimizer=None), aes.add, dtype="floatX")
@@ -691,12 +679,12 @@ class TestCAReduce(unittest_tools.InferShapeTester):
        op = CAReduce(aes.add, axis=None)
        assert str(op) == "CAReduce{add}"
        op = CAReduce(aes.add, axis=(1,))
-        assert str(op) == "CAReduce{add}{1}"
+        assert str(op) == "CAReduce{add}{axis=[1]}"

-        op = CAReduceDtype(aes.add, axis=None, acc_dtype="float64")
-        assert str(op) == "CAReduceDtype{add}{acc_dtype=float64}"
-        op = CAReduceDtype(aes.add, axis=(1,), acc_dtype="float64")
-        assert str(op) == "CAReduceDtype{add}{axis=[1], acc_dtype=float64}"
+        op = CAReduce(aes.add, axis=None, acc_dtype="float64")
+        assert str(op) == "CAReduce{add}{acc_dtype=float64}"
+        op = CAReduce(aes.add, axis=(1,), acc_dtype="float64")
+        assert str(op) == "CAReduce{add}{axis=[1], acc_dtype=float64}"

    def test_repeated_axis(self):
        x = vector("x")