Use memset for initializing memory block to 0 instead of looping over matrix

dimensions (resulted in 3x speedup)
上级 936bfcf6
...@@ -850,13 +850,7 @@ class StructuredDotCSC(gof.Op): ...@@ -850,13 +850,7 @@ class StructuredDotCSC(gof.Op):
//npy_intp nnz = %(a_ind)s->dimensions[0]; //npy_intp nnz = %(a_ind)s->dimensions[0];
//clear the output array //clear the output array
for (npy_intp m = 0; m < M; ++m) memset(Dz, 0, M*N*sizeof(dtype_%(z)s));
{
for (npy_intp n = 0; n < N; ++n)
{
Dz[m*Szm + n*Szn] = 0.0;
}
}
//iterate over the sparse array, making the most of an entry wherever we find it. //iterate over the sparse array, making the most of an entry wherever we find it.
// //
...@@ -879,6 +873,7 @@ class StructuredDotCSC(gof.Op): ...@@ -879,6 +873,7 @@ class StructuredDotCSC(gof.Op):
// loop over sparse column indices through index pointer array // loop over sparse column indices through index pointer array
// (amounts to looping over rows M of sparse matrix) // (amounts to looping over rows M of sparse matrix)
for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1) * Sptr]; ++m_idx) for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1) * Sptr]; ++m_idx)
{ {
npy_int32 m = Dind[m_idx * Sind]; // row index of non-null value for column K npy_int32 m = Dind[m_idx * Sind]; // row index of non-null value for column K
...@@ -901,8 +896,6 @@ class StructuredDotCSC(gof.Op): ...@@ -901,8 +896,6 @@ class StructuredDotCSC(gof.Op):
} }
"""% dict(locals(), **sub) """% dict(locals(), **sub)
# print rval
return rval return rval
sd_csc = StructuredDotCSC() sd_csc = StructuredDotCSC()
...@@ -989,13 +982,7 @@ class StructuredDotCSR(gof.Op): ...@@ -989,13 +982,7 @@ class StructuredDotCSR(gof.Op):
//npy_intp nnz = %(a_ind)s->dimensions[0]; //npy_intp nnz = %(a_ind)s->dimensions[0];
//clear the output array //clear the output array
for (npy_intp m = 0; m < M; ++m) memset(Dz, 0, M*N*sizeof(dtype_%(z)s));
{
for (npy_intp n = 0; n < N; ++n)
{
Dz[m*Szm + n*Szn] = 0.0;
}
}
//iterate over the sparse array, making the most of an entry wherever we find it. //iterate over the sparse array, making the most of an entry wherever we find it.
// Normal matrix matrix multiply: // Normal matrix matrix multiply:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论