提交 2ada4b66 authored 作者: Ricardo Vieira's avatar Ricardo Vieira 提交者: Ricardo Vieira

Faster implementation of numba convolve1d

上级 9530ffcc
import numpy as np
from numba.np.arraymath import _get_inner_prod
from pytensor.link.numba.dispatch import numba_funcify
from pytensor.link.numba.dispatch.basic import numba_njit
......@@ -7,10 +8,63 @@ from pytensor.tensor.signal.conv import Convolve1d
@numba_funcify.register(Convolve1d)
def numba_funcify_Convolve1d(op, node, **kwargs):
# This specialized version is faster than the overloaded numba np.convolve
mode = op.mode
a_dtype, b_dtype = node.inputs[0].type.dtype, node.inputs[1].type.dtype
out_dtype = node.outputs[0].type.dtype
innerprod = _get_inner_prod(a_dtype, b_dtype)
@numba_njit
def conv1d(data, kernel):
return np.convolve(data, kernel, mode=mode)
if mode == "valid":
return conv1d
def valid_convolve1d(x, y):
nx = len(x)
ny = len(y)
if nx < ny:
x, y = y, x
nx, ny = ny, nx
y_flipped = y[::-1]
length = nx - ny + 1
ret = np.empty(length, out_dtype)
for i in range(length):
ret[i] = innerprod(x[i : i + ny], y_flipped)
return ret
return numba_njit(valid_convolve1d)
elif mode == "full":
def full_convolve1d(x, y):
nx = len(x)
ny = len(y)
if nx < ny:
x, y = y, x
nx, ny = ny, nx
y_flipped = y[::-1]
length = nx + ny - 1
ret = np.empty(length, out_dtype)
idx = 0
for i in range(ny - 1):
k = i + 1
ret[idx] = innerprod(x[:k], y_flipped[-k:])
idx = idx + 1
for i in range(nx - ny + 1):
ret[idx] = innerprod(x[i : i + ny], y_flipped)
idx = idx + 1
for i in range(ny - 1):
k = ny - i - 1
ret[idx] = innerprod(x[-k:], y_flipped[:k])
idx = idx + 1
return ret
return numba_njit(full_convolve1d)
else:
raise ValueError(f"Unsupported mode: {mode}")
from functools import partial
import numpy as np
import pytest
from pytensor.tensor import dmatrix
from pytensor import function
from pytensor.tensor import dmatrix, tensor
from pytensor.tensor.signal import convolve1d
from tests.link.numba.test_basic import compare_numba_and_py
......@@ -10,13 +13,47 @@ pytestmark = pytest.mark.filterwarnings("error")
@pytest.mark.parametrize("mode", ["full", "valid", "same"])
def test_convolve1d(mode):
@pytest.mark.parametrize("x_smaller", (False, True))
def test_convolve1d(x_smaller, mode):
x = dmatrix("x")
y = dmatrix("y")
if x_smaller:
out = convolve1d(x[None], y[:, None], mode=mode)
else:
out = convolve1d(y[:, None], x[None], mode=mode)
rng = np.random.default_rng()
test_x = rng.normal(size=(3, 5))
test_y = rng.normal(size=(7, 11))
# Blockwise dispatch for numba can't be run on object mode
compare_numba_and_py([x, y], out, [test_x, test_y], eval_obj_mode=False)
@pytest.mark.parametrize("mode", ("full", "valid"), ids=lambda x: f"mode={x}")
@pytest.mark.parametrize("batch", (False, True), ids=lambda x: f"batch={x}")
def test_convolve1d_benchmark(batch, mode, benchmark):
x = tensor(
shape=(
7,
183,
)
if batch
else (183,)
)
y = tensor(shape=(7, 6) if batch else (6,))
out = convolve1d(x, y, mode=mode)
fn = function([x, y], out, mode="NUMBA", trust_input=True)
rng = np.random.default_rng()
x_test = rng.normal(size=(x.type.shape)).astype(x.type.dtype)
y_test = rng.normal(size=(y.type.shape)).astype(y.type.dtype)
np_convolve1d = np.vectorize(
partial(np.convolve, mode=mode), signature="(x),(y)->(z)"
)
np.testing.assert_allclose(
fn(x_test, y_test),
np_convolve1d(x_test, y_test),
)
benchmark(fn, x_test, y_test)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论