提交 20d65357 authored 作者: James Bergstra's avatar James Bergstra

Merge pull request #1202 from hunse/master

CGemv now uses vector-vector dot product when appropriate
......@@ -492,11 +492,19 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
{
if (PyArray_DESCR(%(xx)s)->type_num == NPY_FLOAT)
{
//fprintf(stderr, "B %%i %%i %%i %%i\\n",
// Nz0, Nz1, Sz0, Sz1);
float alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
//fprintf(stderr, "alpha=%%f\\n", alpha);
//fprintf(stderr, "sx sy %%i %%i\\n", Sx, Sy);
// Check for vector-vector dot (Nx0 == 1). The code may work
// for Sx1 != 1 as well, but has not been tested for this case,
// so Sx1 == 1 is required for safety.
if (Nx0 == 1 && Sx1 == 1)
{
zz_data[0] = fbeta*zz_data[0] + alpha*sdot_(&Nx1,
(float*)(PyArray_DATA(%(xx)s)), &Sx1,
(float*)yy_data, &Sy);
}
else
{
sgemv_(&TRANS, &Nx1, &Nx0,
&alpha,
(float*)(PyArray_DATA(%(xx)s)), &Sx0,
......@@ -504,9 +512,22 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
&fbeta,
(float*)zz_data, &Sz);
}
}
else if (PyArray_DESCR(%(xx)s)->type_num == NPY_DOUBLE)
{
double alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
// Check for vector-vector dot (Nx0 == 1). The code may work
// for Sx1 != 1 as well, but has not been tested for this case,
// so Sx1 == 1 is required for safety.
if (Nx0 == 1 && Sx1 == 1)
{
zz_data[0] = dbeta*zz_data[0] + alpha*ddot_(&Nx1,
(double*)(PyArray_DATA(%(xx)s)), &Sx1,
(double*)yy_data, &Sy);
}
else
{
dgemv_(&TRANS, &Nx1, &Nx0,
&alpha,
(double*)(PyArray_DATA(%(xx)s)), &Sx0,
......@@ -514,6 +535,7 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
&dbeta,
(double*)zz_data, &Sz);
}
}
else
{
PyErr_SetString(PyExc_AssertionError,
......@@ -556,7 +578,7 @@ class CGemv(BaseBLAS, Gemv):
return code
def c_code_cache_version(self):
return (9,)
return (10,)
@local_optimizer([gemv_inplace, gemv_no_inplace])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论