Faster implementation of numba convolve1d

2ada4b66 · Ricardo Vieira · Ricardo Vieira · 9530ffcc · 2ada4b66 · 2ada4b66
--- a/pytensor/link/numba/dispatch/signal/conv.py
+++ b/pytensor/link/numba/dispatch/signal/conv.py
 import numpy as np
+from numba.np.arraymath import _get_inner_prod
 from pytensor.link.numba.dispatch import numba_funcify
 from pytensor.link.numba.dispatch.basic import numba_njit
@@ -7,10 +8,63 @@ from pytensor.tensor.signal.conv import Convolve1d
 @numba_funcify.register(Convolve1d)
 def numba_funcify_Convolve1d(op, node, **kwargs):
+    # This specialized version is faster than the overloaded numba np.convolve
    mode = op.mode
+    a_dtype, b_dtype = node.inputs[0].type.dtype, node.inputs[1].type.dtype
+    out_dtype = node.outputs[0].type.dtype
+    innerprod = _get_inner_prod(a_dtype, b_dtype)
-    @numba_njit
+    if mode == "valid":
-    def conv1d(data, kernel):
-        return np.convolve(data, kernel, mode=mode)
-    return conv1d
+        def valid_convolve1d(x, y):
+            nx = len(x)
+            ny = len(y)
+            if nx < ny:
+                x, y = y, x
+                nx, ny = ny, nx
+            y_flipped = y[::-1]
+            length = nx - ny + 1
+            ret = np.empty(length, out_dtype)
+            for i in range(length):
+                ret[i] = innerprod(x[i : i + ny], y_flipped)
+            return ret
+        return numba_njit(valid_convolve1d)
+    elif mode == "full":
+        def full_convolve1d(x, y):
+            nx = len(x)
+            ny = len(y)
+            if nx < ny:
+                x, y = y, x
+                nx, ny = ny, nx
+            y_flipped = y[::-1]
+            length = nx + ny - 1
+            ret = np.empty(length, out_dtype)
+            idx = 0
+            for i in range(ny - 1):
+                k = i + 1
+                ret[idx] = innerprod(x[:k], y_flipped[-k:])
+                idx = idx + 1
+            for i in range(nx - ny + 1):
+                ret[idx] = innerprod(x[i : i + ny], y_flipped)
+                idx = idx + 1
+            for i in range(ny - 1):
+                k = ny - i - 1
+                ret[idx] = innerprod(x[-k:], y_flipped[:k])
+                idx = idx + 1
+            return ret
+        return numba_njit(full_convolve1d)
+    else:
+        raise ValueError(f"Unsupported mode: {mode}")
--- a/tests/link/numba/signal/test_conv.py
+++ b/tests/link/numba/signal/test_conv.py
+from functools import partial
 import numpy as np
 import pytest
-from pytensor.tensor import dmatrix
+from pytensor import function
+from pytensor.tensor import dmatrix, tensor
 from pytensor.tensor.signal import convolve1d
 from tests.link.numba.test_basic import compare_numba_and_py
@@ -10,13 +13,47 @@ pytestmark = pytest.mark.filterwarnings("error")
 @pytest.mark.parametrize("mode", ["full", "valid", "same"])
-def test_convolve1d(mode):
+@pytest.mark.parametrize("x_smaller", (False, True))
+def test_convolve1d(x_smaller, mode):
    x = dmatrix("x")
    y = dmatrix("y")
-    out = convolve1d(x[None], y[:, None], mode=mode)
+    if x_smaller:
+        out = convolve1d(x[None], y[:, None], mode=mode)
+    else:
+        out = convolve1d(y[:, None], x[None], mode=mode)
    rng = np.random.default_rng()
    test_x = rng.normal(size=(3, 5))
    test_y = rng.normal(size=(7, 11))
    # Blockwise dispatch for numba can't be run on object mode
    compare_numba_and_py([x, y], out, [test_x, test_y], eval_obj_mode=False)
+@pytest.mark.parametrize("mode", ("full", "valid"), ids=lambda x: f"mode={x}")
+@pytest.mark.parametrize("batch", (False, True), ids=lambda x: f"batch={x}")
+def test_convolve1d_benchmark(batch, mode, benchmark):
+    x = tensor(
+        shape=(
+            7,
+            183,
+        )
+        if batch
+        else (183,)
+    )
+    y = tensor(shape=(7, 6) if batch else (6,))
+    out = convolve1d(x, y, mode=mode)
+    fn = function([x, y], out, mode="NUMBA", trust_input=True)
+    rng = np.random.default_rng()
+    x_test = rng.normal(size=(x.type.shape)).astype(x.type.dtype)
+    y_test = rng.normal(size=(y.type.shape)).astype(y.type.dtype)
+    np_convolve1d = np.vectorize(
+        partial(np.convolve, mode=mode), signature="(x),(y)->(z)"
+    )
+    np.testing.assert_allclose(
+        fn(x_test, y_test),
+        np_convolve1d(x_test, y_test),
+    )
+    benchmark(fn, x_test, y_test)