提交 14af4399 authored 作者: hunse's avatar hunse

ENH: CGemv now uses vector-vector dot when possible for increased speed (fixes issue #1145)

上级 95adacbd
......@@ -497,6 +497,16 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
float alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
//fprintf(stderr, "alpha=%%f\\n", alpha);
//fprintf(stderr, "sx sy %%i %%i\\n", Sx, Sy);
if (Nx0 == 1 && Sx1 == 1)
{
// Special case: vector-vector dot
zz_data[0] = fbeta*zz_data[0] + alpha*sdot_(&Nx1,
(float*)(PyArray_DATA(%(xx)s)), &Sx1,
(float*)yy_data, &Sy);
}
else
{
sgemv_(&TRANS, &Nx1, &Nx0,
&alpha,
(float*)(PyArray_DATA(%(xx)s)), &Sx0,
......@@ -504,9 +514,20 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
&fbeta,
(float*)zz_data, &Sz);
}
}
else if (PyArray_DESCR(%(xx)s)->type_num == NPY_DOUBLE)
{
double alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
if (Nx0 == 1 && Sx1 == 1)
{
// Special case: vector-vector dot
zz_data[0] = dbeta*zz_data[0] + alpha*ddot_(&Nx1,
(double*)(PyArray_DATA(%(xx)s)), &Sx1,
(double*)yy_data, &Sy);
}
else
{
dgemv_(&TRANS, &Nx1, &Nx0,
&alpha,
(double*)(PyArray_DATA(%(xx)s)), &Sx0,
......@@ -514,6 +535,7 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
&dbeta,
(double*)zz_data, &Sz);
}
}
else
{
PyErr_SetString(PyExc_AssertionError,
......@@ -556,7 +578,7 @@ class CGemv(BaseBLAS, Gemv):
return code
def c_code_cache_version(self):
return (9,)
return (10,)
@local_optimizer([gemv_inplace, gemv_no_inplace])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论