提交 2ada4b66 authored 作者: Ricardo Vieira's avatar Ricardo Vieira 提交者: Ricardo Vieira

Faster implementation of numba convolve1d

上级 9530ffcc
import numpy as np import numpy as np
from numba.np.arraymath import _get_inner_prod
from pytensor.link.numba.dispatch import numba_funcify from pytensor.link.numba.dispatch import numba_funcify
from pytensor.link.numba.dispatch.basic import numba_njit from pytensor.link.numba.dispatch.basic import numba_njit
...@@ -7,10 +8,63 @@ from pytensor.tensor.signal.conv import Convolve1d ...@@ -7,10 +8,63 @@ from pytensor.tensor.signal.conv import Convolve1d
@numba_funcify.register(Convolve1d) @numba_funcify.register(Convolve1d)
def numba_funcify_Convolve1d(op, node, **kwargs): def numba_funcify_Convolve1d(op, node, **kwargs):
# This specialized version is faster than the overloaded numba np.convolve
mode = op.mode mode = op.mode
a_dtype, b_dtype = node.inputs[0].type.dtype, node.inputs[1].type.dtype
out_dtype = node.outputs[0].type.dtype
innerprod = _get_inner_prod(a_dtype, b_dtype)
@numba_njit if mode == "valid":
def conv1d(data, kernel):
return np.convolve(data, kernel, mode=mode)
return conv1d def valid_convolve1d(x, y):
nx = len(x)
ny = len(y)
if nx < ny:
x, y = y, x
nx, ny = ny, nx
y_flipped = y[::-1]
length = nx - ny + 1
ret = np.empty(length, out_dtype)
for i in range(length):
ret[i] = innerprod(x[i : i + ny], y_flipped)
return ret
return numba_njit(valid_convolve1d)
elif mode == "full":
def full_convolve1d(x, y):
nx = len(x)
ny = len(y)
if nx < ny:
x, y = y, x
nx, ny = ny, nx
y_flipped = y[::-1]
length = nx + ny - 1
ret = np.empty(length, out_dtype)
idx = 0
for i in range(ny - 1):
k = i + 1
ret[idx] = innerprod(x[:k], y_flipped[-k:])
idx = idx + 1
for i in range(nx - ny + 1):
ret[idx] = innerprod(x[i : i + ny], y_flipped)
idx = idx + 1
for i in range(ny - 1):
k = ny - i - 1
ret[idx] = innerprod(x[-k:], y_flipped[:k])
idx = idx + 1
return ret
return numba_njit(full_convolve1d)
else:
raise ValueError(f"Unsupported mode: {mode}")
from functools import partial
import numpy as np import numpy as np
import pytest import pytest
from pytensor.tensor import dmatrix from pytensor import function
from pytensor.tensor import dmatrix, tensor
from pytensor.tensor.signal import convolve1d from pytensor.tensor.signal import convolve1d
from tests.link.numba.test_basic import compare_numba_and_py from tests.link.numba.test_basic import compare_numba_and_py
...@@ -10,13 +13,47 @@ pytestmark = pytest.mark.filterwarnings("error") ...@@ -10,13 +13,47 @@ pytestmark = pytest.mark.filterwarnings("error")
@pytest.mark.parametrize("mode", ["full", "valid", "same"]) @pytest.mark.parametrize("mode", ["full", "valid", "same"])
def test_convolve1d(mode): @pytest.mark.parametrize("x_smaller", (False, True))
def test_convolve1d(x_smaller, mode):
x = dmatrix("x") x = dmatrix("x")
y = dmatrix("y") y = dmatrix("y")
out = convolve1d(x[None], y[:, None], mode=mode) if x_smaller:
out = convolve1d(x[None], y[:, None], mode=mode)
else:
out = convolve1d(y[:, None], x[None], mode=mode)
rng = np.random.default_rng() rng = np.random.default_rng()
test_x = rng.normal(size=(3, 5)) test_x = rng.normal(size=(3, 5))
test_y = rng.normal(size=(7, 11)) test_y = rng.normal(size=(7, 11))
# Blockwise dispatch for numba can't be run on object mode # Blockwise dispatch for numba can't be run on object mode
compare_numba_and_py([x, y], out, [test_x, test_y], eval_obj_mode=False) compare_numba_and_py([x, y], out, [test_x, test_y], eval_obj_mode=False)
@pytest.mark.parametrize("mode", ("full", "valid"), ids=lambda x: f"mode={x}")
@pytest.mark.parametrize("batch", (False, True), ids=lambda x: f"batch={x}")
def test_convolve1d_benchmark(batch, mode, benchmark):
x = tensor(
shape=(
7,
183,
)
if batch
else (183,)
)
y = tensor(shape=(7, 6) if batch else (6,))
out = convolve1d(x, y, mode=mode)
fn = function([x, y], out, mode="NUMBA", trust_input=True)
rng = np.random.default_rng()
x_test = rng.normal(size=(x.type.shape)).astype(x.type.dtype)
y_test = rng.normal(size=(y.type.shape)).astype(y.type.dtype)
np_convolve1d = np.vectorize(
partial(np.convolve, mode=mode), signature="(x),(y)->(z)"
)
np.testing.assert_allclose(
fn(x_test, y_test),
np_convolve1d(x_test, y_test),
)
benchmark(fn, x_test, y_test)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论