Handle invalid BroadcastTo shape in C backend

65826e7e · Ricardo Vieira · Luciano Paz · 24b67a86 · 65826e7e · 65826e7e
--- a/pytensor/tensor/extra_ops.py
+++ b/pytensor/tensor/extra_ops.py
@@ -1643,6 +1643,11 @@ class BroadcastTo(COp):
        shape, static_shape = at.infer_static_shape(shape)
+        if len(shape) < a.ndim:
+            raise ValueError(
+                f"Broadcast target shape has {len(shape)} dims, which is shorter than input with {a.ndim} dims"
+            )
        out = TensorType(dtype=a.type.dtype, shape=static_shape)()
        # Attempt to prevent in-place operations on this view-based output
@@ -1686,9 +1691,12 @@ class BroadcastTo(COp):
        return [node.inputs[1:]]
    def c_code(self, node, name, inputs, outputs, sub):
+        inp_dims = node.inputs[0].ndim
+        out_dims = node.outputs[0].ndim
+        new_dims = out_dims - inp_dims
        (x, *shape) = inputs
        (out,) = outputs
-        ndims = len(shape)
        fail = sub["fail"]
        # TODO: Could just use `PyArray_Return`, no?
@@ -1701,20 +1709,34 @@ class BroadcastTo(COp):
        src = (
            """
-            npy_intp itershape[%(ndims)s] = {%(dims_array)s};
+            npy_intp itershape[%(out_dims)s] = {%(dims_array)s};
+            NpyIter *iter;
            PyArrayObject *ops[1] = {%(x)s};
            npy_uint32 flags = NPY_ITER_MULTI_INDEX | NPY_ITER_REFS_OK | NPY_ITER_ZEROSIZE_OK;
            npy_uint32 op_flags[1] = {NPY_ITER_READONLY};
            PyArray_Descr *op_dtypes[1] = {NULL};
-            int oa_ndim = %(ndims)s;
+            int oa_ndim = %(out_dims)s;
            int* op_axes[1] = {NULL};
            npy_intp buffersize = 0;
-            NpyIter *iter = NpyIter_AdvancedNew(
+            for(int i = 0; i < %(inp_dims)s; i++)
-                1, ops, flags, NPY_CORDER, NPY_NO_CASTING, op_flags, op_dtypes, oa_ndim, op_axes, itershape, buffersize
+            {
+                if ((PyArray_DIMS(%(x)s)[i] != 1) && (PyArray_DIMS(%(x)s)[i] != itershape[i + %(new_dims)s]))
+                {
+                    PyErr_Format(PyExc_ValueError,
+                                 "Shape mismatch in broadcast_to: target shape[%%i] = %%lld is incompatible with input shape = %%lld.",
+                                 i,
+                                 (long long int) itershape[i + %(new_dims)s],
+                                 (long long int) PyArray_DIMS(%(x)s)[i]
                    );
+                    %(fail)s
+                }
+            }
+            iter = NpyIter_AdvancedNew(
+                1, ops, flags, NPY_CORDER, NPY_NO_CASTING, op_flags, op_dtypes, oa_ndim, op_axes, itershape, buffersize
+            );
            %(out)s = NpyIter_GetIterView(iter, 0);
            if(%(out)s == NULL){
@@ -1733,7 +1755,7 @@ class BroadcastTo(COp):
        return src
    def c_code_cache_version(self):
-        return (1,)
+        return (2,)
 broadcast_to_ = BroadcastTo()

--- a/tests/tensor/test_extra_ops.py
+++ b/tests/tensor/test_extra_ops.py
@@ -1253,41 +1253,52 @@ class TestBroadcastTo(utt.InferShapeTester):
    @pytest.mark.parametrize("linker", ["cvm", "py"])
    def test_perform(self, linker):
-        a = pytensor.shared(5)
+        a = pytensor.shared(np.full((3, 1, 1), 5))
+        s_0 = iscalar("s_0")
        s_1 = iscalar("s_1")
-        shape = (s_1, 1)
+        shape = (s_0, s_1, 1)
        bcast_res = broadcast_to(a, shape)
-        assert bcast_res.broadcastable == (False, True)
+        assert bcast_res.broadcastable == (False, False, True)
        bcast_fn = pytensor.function(
-            [s_1], bcast_res, mode=Mode(optimizer=None, linker=linker)
+            [s_0, s_1], bcast_res, mode=Mode(optimizer=None, linker=linker)
        )
        bcast_fn.vm.allow_gc = False
-        bcast_at = bcast_fn(4)
+        bcast_at = bcast_fn(3, 4)
-        bcast_np = np.broadcast_to(5, (4, 1))
+        bcast_np = np.broadcast_to(5, (3, 4, 1))
        assert np.array_equal(bcast_at, bcast_np)
+        with pytest.raises(ValueError):
+            bcast_fn(5, 4)
+        if linker != "py":
            bcast_var = bcast_fn.maker.fgraph.outputs[0].owner.inputs[0]
            bcast_in = bcast_fn.vm.storage_map[a]
            bcast_out = bcast_fn.vm.storage_map[bcast_var]
-        if linker != "py":
            assert np.shares_memory(bcast_out[0], bcast_in[0])
+    def test_make_node_error_handling(self):
+        with pytest.raises(
+            ValueError,
+            match="Broadcast target shape has 1 dims, which is shorter than input with 2 dims",
+        ):
+            broadcast_to(at.zeros((3, 4)), (5,))
    @pytest.mark.skipif(
        not config.cxx, reason="G++ not available, so we need to skip this test."
    )
-    def test_memory_leak(self):
+    @pytest.mark.parametrize("valid", (True, False))
+    def test_memory_leak(self, valid):
        import gc
        import tracemalloc
        from pytensor.link.c.cvm import CVM
        n = 100_000
-        x = pytensor.shared(np.ones(n, dtype=np.float64))
+        x = pytensor.shared(np.ones((1, n), dtype=np.float64))
        y = broadcast_to(x, (5, n))
        f = pytensor.function([], y, mode=Mode(optimizer=None, linker="cvm"))
@@ -1303,8 +1314,17 @@ class TestBroadcastTo(utt.InferShapeTester):
        blocks_last = None
        block_diffs = []
        for i in range(1, 50):
-            x.set_value(np.ones(n))
+            if valid:
+                x.set_value(np.ones((1, n)))
                _ = f()
+            else:
+                x.set_value(np.ones((2, n)))
+                try:
+                    _ = f()
+                except ValueError:
+                    pass
+                else:
+                    raise RuntimeError("Should have failed")
            _ = gc.collect()
            blocks_i, _ = tracemalloc.get_traced_memory()
            if blocks_last is not None:
@@ -1313,7 +1333,7 @@ class TestBroadcastTo(utt.InferShapeTester):
            blocks_last = blocks_i
        tracemalloc.stop()
-        assert np.allclose(np.mean(block_diffs), 0)
+        assert np.all(np.array(block_diffs) <= (0 + 1e-8))
    @pytest.mark.parametrize(
        "fn,input_dims",