提交 6b4c933d authored 作者: Brandon T. Willard's avatar Brandon T. Willard 提交者: Brandon T. Willard

Remove old Scan speed tests and add a Cython performance test

上级 2bf1676e
...@@ -48,7 +48,7 @@ from aesara.tensor.random import normal ...@@ -48,7 +48,7 @@ from aesara.tensor.random import normal
from aesara.tensor.random.utils import RandomStream from aesara.tensor.random.utils import RandomStream
from aesara.tensor.shape import Shape_i, reshape, specify_shape from aesara.tensor.shape import Shape_i, reshape, specify_shape
from aesara.tensor.sharedvar import SharedVariable from aesara.tensor.sharedvar import SharedVariable
from aesara.tensor.subtensor import Subtensor, inc_subtensor from aesara.tensor.subtensor import Subtensor
from aesara.tensor.type import ( from aesara.tensor.type import (
dcol, dcol,
dmatrix, dmatrix,
...@@ -2182,173 +2182,53 @@ def test_cvm_exception_handling(mode): ...@@ -2182,173 +2182,53 @@ def test_cvm_exception_handling(mode):
@pytest.mark.skipif( @pytest.mark.skipif(
not config.cxx, reason="G++ not available, so we need to skip this test." not config.cxx, reason="G++ not available, so we need to skip this test."
) )
def test_speed(): def test_cython_performance():
n_timeit = 50
# We need the CVM for this speed test
r = np.arange(10000).astype(config.floatX).reshape(1000, 10)
def f_py():
for i in range(1, 1000):
r[i] += r[i - 1]
python_duration = timeit.timeit(lambda: f_py(), number=n_timeit)
r = np.arange(10000).astype(config.floatX).reshape(1000, 10)
def f_py_iter():
r_i = iter(r[1:])
r_ii = iter(r[:-1])
while True:
try:
tmp = next(r_i)
tmp += next(r_ii)
except StopIteration:
break
python_iter_duration = timeit.timeit(lambda: f_py_iter(), number=n_timeit)
# r = np.arange(10000).astype(config.floatX).reshape(1000, 10)
# s_r = matrix()
# s_y, updates = scan(
# fn=lambda ri, rii: ri + rii,
# sequences=[s_r[1:]],
# outputs_info=at.constant(r[0]),
# mode=Mode(linker="cvm"),
# )
# assert not updates
#
# f_cvm = function([s_r], s_y)
#
# cvm_duration = timeit.timeit(lambda: f_cvm(r), number=n_timeit)
# XXX: Why does this take so much longer than Python?!
# assert cvm_duration - python_duration < python_duration * 0.15
r = np.arange(10000).astype(config.floatX).reshape(-1, 10)
shared_r = shared(r)
s_i = shared(np.array(1))
s_rinc = inc_subtensor(
shared_r[s_i], shared_r[s_i - 1], tolerate_inplace_aliasing=True
)
f_cvm_shared = function(
[],
[],
updates=OrderedDict([(s_i, s_i + 1), (shared_r, s_rinc)]),
mode=Mode(linker="cvm"),
)
f_cvm_shared._check_for_aliased_inputs = False
cvm_shared_duration = timeit.timeit(lambda: f_cvm_shared(), number=n_timeit)
assert cvm_shared_duration < python_duration # This implicitly confirms that the Cython version is being used
assert cvm_shared_duration < python_iter_duration from aesara.scan import scan_perform_ext # noqa: F401
# Python usually out-performs Aesara below 100 iterations
@pytest.mark.skipif( N = 200
not config.cxx, reason="G++ not available, so we need to skip this test."
)
def test_speed_rnn():
n_timeit = 50 n_timeit = 50
L = 10000
N = 50
np.random.seed(2523452) M = -1 / np.arange(1, 11).astype(config.floatX)
r = np.arange(L * N).astype(config.floatX).reshape(L, N) r = np.arange(N * 10).astype(config.floatX).reshape(N, 10)
w = np.random.default_rng(utt.fetch_seed()).random((N, N)).astype(config.floatX)
def f_py(): def f_py():
for i in range(1, L): py_out = np.empty((N, 10), dtype=config.floatX)
r[i] = np.tanh(np.dot(r[i - 1], w)) py_out[0] = r[0]
for i in range(1, py_out.shape[0]):
python_duration = timeit.timeit(lambda: f_py(), number=n_timeit) py_out[i] = r[i] + M * py_out[i - 1]
return py_out[1:]
# r = np.arange(L * N).astype(config.floatX).reshape(L, N)
# s_r = matrix() py_res = f_py()
# s_y, updates = scan(
# fn=lambda ri, rii: tanh(dot(rii, w)), s_r = at.as_tensor_variable(r, dtype=config.floatX)
# sequences=[s_r[1:]], s_y, updates = scan(
# outputs_info=at.constant(r[0]), fn=lambda ri, rii, M: ri + M * rii,
# mode=Mode(linker="cvm"), sequences=[s_r[1:]],
# ) non_sequences=[at.as_tensor_variable(M, dtype=config.floatX)],
# assert not updates outputs_info=s_r[0],
# mode=Mode(linker="cvm", optimizer="fast_run"),
# f_cvm = function([s_r], s_y, mode=Mode(linker="cvm"))
#
# cvm_duration = timeit.timeit(lambda: f_cvm(r), number=n_timeit)
# XXX: Why does this take so much longer than Python?!
# assert cvm_duration - python_duration < python_duration * 0.15
r = np.arange(L * N).astype(config.floatX).reshape(L, N)
shared_r = shared(r)
s_i = shared(1)
s_rinc = inc_subtensor(
shared_r[s_i],
tanh(dot(shared_r[s_i - 1], w)),
tolerate_inplace_aliasing=True,
)
f_cvm_shared = function(
[],
[],
updates=OrderedDict([(s_i, s_i + 1), (shared_r, s_rinc)]),
mode=Mode(linker="cvm"),
) )
assert not updates
cvm_shared_duration = timeit.timeit(lambda: f_cvm_shared(), number=n_timeit) f_cvm = function([], s_y, mode="FAST_RUN")
f_cvm.trust_input = True
assert cvm_shared_duration < python_duration
@pytest.mark.skipif(
not config.cxx, reason="G++ not available, so we need to skip this test."
)
def test_speed_batchrnn():
"""
This function prints out the speed of recurrent neural network
calculations implemented in various ways.
We force the mode to Mode(linker='cvm'). If you manually
change this code to use DebugMode this will test the correctness
of the optimizations applied, but generally correctness-testing
is not the goal of this test.
The computation being tested here is a repeated tanh of a matrix-vector
multiplication - the heart of an ESN or RNN.
"""
L = 100
B = 50
N = 400
np.random.seed(2523452)
r = np.arange(B * L * N).astype(config.floatX).reshape(L, B, N)
w = np.random.default_rng(utt.fetch_seed()).random((N, N)).astype(config.floatX)
def ref_fn(): # Make sure we're actually computing a `Scan`
for i in range(1, L): assert any(isinstance(node.op, Scan) for node in f_cvm.maker.fgraph.apply_nodes)
r[i] = np.tanh(np.dot(r[i - 1], w))
python_duration = timeit.timeit(ref_fn, number=20) cvm_res = f_cvm()
r = np.arange(B * L * N).astype(config.floatX).reshape(L, B, N) # Make sure the results are the same between the two implementations
shared_r = shared(r) assert np.allclose(cvm_res, py_res)
s_i = shared(1)
s_rinc = inc_subtensor(
shared_r[s_i],
tanh(dot(shared_r[s_i - 1], w)),
tolerate_inplace_aliasing=True,
)
f = function(
[],
[],
updates=[(s_i, s_i + 1), (shared_r, s_rinc)],
mode=Mode(linker="cvm"),
)
cvm_duration = timeit.timeit(f, number=20) python_duration = timeit.timeit(lambda: f_py(), number=n_timeit)
cvm_duration = timeit.timeit(lambda: f_cvm(), number=n_timeit)
print(f"python={python_duration}, cvm={cvm_duration}")
assert cvm_duration < python_duration assert cvm_duration <= python_duration
@config.change_flags(mode="FAST_COMPILE", compute_test_value="raise") @config.change_flags(mode="FAST_COMPILE", compute_test_value="raise")
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论