提交 3fb06c11 authored 作者: Frederic's avatar Frederic

use current numpy c api

上级 b01cf487
...@@ -1795,9 +1795,9 @@ class AddSD(gof.op.Op): ...@@ -1795,9 +1795,9 @@ class AddSD(gof.op.Op):
} }
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * __restrict__ indices = (npy_int32*)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s); dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s); dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
...@@ -2983,10 +2983,10 @@ class StructuredDotGradCSC(gof.Op): ...@@ -2983,10 +2983,10 @@ class StructuredDotGradCSC(gof.Op):
if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1]) if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1])
...@@ -2996,29 +2996,29 @@ class StructuredDotGradCSC(gof.Op): ...@@ -2996,29 +2996,29 @@ class StructuredDotGradCSC(gof.Op):
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])) || (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]))
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_g)s)->type_num); %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_g)s));
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
npy_intp nnz = PyArray_DIMS(%(_indices)s)[0]; npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/PyArray_DESCR(%(_indices)s)->elsize; npy_intp Sindices = PyArray_STRIDES(%(_indices)s)[0]/PyArray_DESCR(%(_indices)s)->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/PyArray_DESCR(%(_indptr)s)->elsize; npy_intp Sindptr = PyArray_STRIDES(%(_indptr)s)[0]/PyArray_DESCR(%(_indptr)s)->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/PyArray_DESCR(%(_d)s)->elsize; const npy_intp Sd1 = PyArray_STRIDES(%(_d)s)[1]/PyArray_DESCR(%(_d)s)->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/PyArray_DESCR(%(_g)s)->elsize; const npy_intp Sg1 = PyArray_STRIDES(%(_g)s)[1]/PyArray_DESCR(%(_g)s)->elsize;
const npy_intp K = PyArray_DIMS(%(_d)s)[1]; const npy_intp K = PyArray_DIMS(%(_d)s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
// loop over columns // loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
{ {
// extract j-th row of dense matrix // extract j-th row of dense matrix
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j); const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(PyArray_BYTES(%(_d)s) + PyArray_STRIDES(%(_d)s)[0] * j);
if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;} if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// for each non-null value in the sparse column // for each non-null value in the sparse column
...@@ -3028,7 +3028,7 @@ class StructuredDotGradCSC(gof.Op): ...@@ -3028,7 +3028,7 @@ class StructuredDotGradCSC(gof.Op):
npy_int32 i = indices[i_idx * Sindices]; npy_int32 i = indices[i_idx * Sindices];
// extract corresponding row in gradient // extract corresponding row in gradient
const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(%(_g)s->data + %(_g)s->strides[0] * i); const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(PyArray_BYTES(%(_g)s) + PyArray_STRIDES(%(_g)s)[0] * i);
double ip = 0.0; double ip = 0.0;
// make sure that row index is not bigger than actual number of rows // make sure that row index is not bigger than actual number of rows
...@@ -3044,7 +3044,7 @@ class StructuredDotGradCSC(gof.Op): ...@@ -3044,7 +3044,7 @@ class StructuredDotGradCSC(gof.Op):
} }
// write resulting gradient to sparse output // write resulting gradient to sparse output
((dtype_%(_zout)s* __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip; ((dtype_%(_zout)s* __restrict__)(PyArray_BYTES(%(_zout)s) + i_idx * PyArray_STRIDES(%(_zout)s)[0]))[0] = ip;
} }
} }
} }
...@@ -3119,10 +3119,10 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3119,10 +3119,10 @@ class StructuredDotGradCSR(gof.Op):
if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1]) if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1])
...@@ -3132,7 +3132,7 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3132,7 +3132,7 @@ class StructuredDotGradCSR(gof.Op):
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])) || (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]))
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_g)s)->type_num); %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_g)s));
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
...@@ -3140,16 +3140,16 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3140,16 +3140,16 @@ class StructuredDotGradCSR(gof.Op):
// extract number of rows // extract number of rows
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/PyArray_DESCR(%(_indices)s)->elsize; npy_intp Sindices = PyArray_STRIDES(%(_indices)s)[0]/PyArray_DESCR(%(_indices)s)->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/PyArray_DESCR(%(_indptr)s)->elsize; npy_intp Sindptr = PyArray_STRIDES(%(_indptr)s)[0]/PyArray_DESCR(%(_indptr)s)->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/PyArray_DESCR(%(_d)s)->elsize; const npy_intp Sd1 = PyArray_STRIDES(%(_d)s)[1]/PyArray_DESCR(%(_d)s)->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/PyArray_DESCR(%(_g)s)->elsize; const npy_intp Sg1 = PyArray_STRIDES(%(_g)s)[1]/PyArray_DESCR(%(_g)s)->elsize;
const npy_intp K = PyArray_DIMS(%(_d)s)[1]; const npy_intp K = PyArray_DIMS(%(_d)s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
// loop over columns of sparse matrix // loop over columns of sparse matrix
for (npy_int32 i = 0; i < N; ++i) for (npy_int32 i = 0; i < N; ++i)
...@@ -3161,11 +3161,11 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3161,11 +3161,11 @@ class StructuredDotGradCSR(gof.Op):
npy_int32 j = indices[j_idx * Sindices]; npy_int32 j = indices[j_idx * Sindices];
// extract j-th row of dense matrix // extract j-th row of dense matrix
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j); const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(PyArray_BYTES(%(_d)s) + PyArray_STRIDES(%(_d)s)[0] * j);
if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;} if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// extract corresponding row in gradient // extract corresponding row in gradient
const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(%(_g)s->data + %(_g)s->strides[0] * i); const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(PyArray_BYTES(%(_g)s) + PyArray_STRIDES(%(_g)s)[0] * i);
double ip = 0.0; double ip = 0.0;
// make sure that row index is not bigger than actual number of rows // make sure that row index is not bigger than actual number of rows
...@@ -3181,7 +3181,7 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3181,7 +3181,7 @@ class StructuredDotGradCSR(gof.Op):
} }
// write resulting gradient to sparse output // write resulting gradient to sparse output
((dtype_%(_zout)s* __restrict__)(%(_zout)s->data + j_idx * %(_zout)s->strides[0]))[0] = ip; ((dtype_%(_zout)s* __restrict__)(PyArray_BYTES(%(_zout)s) + j_idx * PyArray_STRIDES(%(_zout)s)[0]))[0] = ip;
} }
} }
} }
......
...@@ -142,19 +142,19 @@ class StructuredDotCSC(gof.Op): ...@@ -142,19 +142,19 @@ class StructuredDotCSC(gof.Op):
if (PyArray_NDIM(%(a_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;} if (PyArray_NDIM(%(a_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;} if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (PyArray_DESCR(%(a_val)s)->type_num != %(typenum_a_val)s) { if (PyArray_TYPE(%(a_val)s) != %(typenum_a_val)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val"); %(fail)s;}
if (PyArray_DESCR(%(b)s)->type_num != %(typenum_b)s) { if (PyArray_TYPE(%(b)s) != %(typenum_b)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;}
if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(a_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(a_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(a_nrows)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(a_nrows)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;}
if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0]) if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
...@@ -164,13 +164,13 @@ class StructuredDotCSC(gof.Op): ...@@ -164,13 +164,13 @@ class StructuredDotCSC(gof.Op):
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;}
if ((!%(z)s) if ((!%(z)s)
|| (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)%(a_nrows)s->data)[0]) || (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)PyArray_DATA(%(a_nrows)s))[0])
|| (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1]) || (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1])
) )
{ {
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
npy_intp dims[] = {0, 0}; npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0]; dims[0] = ((npy_int32 *)PyArray_DATA(%(a_nrows)s))[0];
dims[1] = PyArray_DIMS(%(b)s)[1]; dims[1] = PyArray_DIMS(%(b)s)[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s); %(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
} }
...@@ -182,19 +182,19 @@ class StructuredDotCSC(gof.Op): ...@@ -182,19 +182,19 @@ class StructuredDotCSC(gof.Op):
npy_intp K = PyArray_DIMS(%(b)s)[0]; npy_intp K = PyArray_DIMS(%(b)s)[0];
// strides tell you how many bytes to skip to go to next column/row entry // strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize; npy_intp Szm = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize; npy_intp Szn = PyArray_STRIDES(%(z)s)[1] / PyArray_DESCR(%(z)s)->elsize;
//npy_intp Sbm = %(b)s->strides[0] / PyArray_DESCR(%(b)s)->elsize; //npy_intp Sbm = PyArray_STRIDES(%(b)s)[0] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sbn = %(b)s->strides[1] / PyArray_DESCR(%(b)s)->elsize; npy_intp Sbn = PyArray_STRIDES(%(b)s)[1] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sval = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize; npy_intp Sval = PyArray_STRIDES(%(a_val)s)[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize; npy_intp Sind = PyArray_STRIDES(%(a_ind)s)[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize; npy_intp Sptr = PyArray_STRIDES(%(a_ptr)s)[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)PyArray_DATA(%(z)s);
const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)%(a_val)s->data; const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)PyArray_DATA(%(a_val)s);
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)PyArray_DATA(%(a_ind)s;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)PyArray_DATA(%(a_ptr)s;
//npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0]; //npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
...@@ -218,7 +218,7 @@ class StructuredDotCSC(gof.Op): ...@@ -218,7 +218,7 @@ class StructuredDotCSC(gof.Op):
for (npy_int32 k = 0; k < K; ++k) for (npy_int32 k = 0; k < K; ++k)
{ {
// get pointer to k-th row of dense matrix // get pointer to k-th row of dense matrix
const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(%(b)s->data + %(b)s->strides[0] * k); const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(PyArray_BYTES(%(b)s) + PyArray_STRIDES(%(b)s)[0] * k);
// loop over sparse column indices through index pointer array // loop over sparse column indices through index pointer array
// (amounts to looping over rows M of sparse matrix) // (amounts to looping over rows M of sparse matrix)
...@@ -229,7 +229,7 @@ class StructuredDotCSC(gof.Op): ...@@ -229,7 +229,7 @@ class StructuredDotCSC(gof.Op):
const dtype_%(a_val)s Amk = Dval[m_idx * Sval]; // actual value at that location const dtype_%(a_val)s Amk = Dval[m_idx * Sval]; // actual value at that location
// pointer to m-th row of the output matrix Z // pointer to m-th row of the output matrix Z
dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m); dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(PyArray_BYTES(%(z)s) + PyArray_STRIDES(%(z)s)[0] * m);
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint? //RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
if (m >= PyArray_DIMS(%(z)s)[0]) if (m >= PyArray_DIMS(%(z)s)[0])
...@@ -330,10 +330,10 @@ class StructuredDotCSR(gof.Op): ...@@ -330,10 +330,10 @@ class StructuredDotCSR(gof.Op):
if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;} if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(a_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(a_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0]) if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
...@@ -358,19 +358,19 @@ class StructuredDotCSR(gof.Op): ...@@ -358,19 +358,19 @@ class StructuredDotCSR(gof.Op):
npy_intp K = PyArray_DIMS(%(b)s)[0]; npy_intp K = PyArray_DIMS(%(b)s)[0];
// strides tell you how many bytes to skip to go to next column/row entry // strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize; npy_intp Szm = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize; npy_intp Szn = PyArray_STRIDES(%(z)s)[1] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Sbm = %(b)s->strides[0] / PyArray_DESCR(%(b)s)->elsize; npy_intp Sbm = PyArray_STRIDES(%(b)s)[0] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sbn = %(b)s->strides[1] / PyArray_DESCR(%(b)s)->elsize; npy_intp Sbn = PyArray_STRIDES(%(b)s)[1] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sval = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize; npy_intp Sval = PyArray_STRIDES(%(a_val)s)[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize; npy_intp Sind = PyArray_STRIDES(%(a_ind)s)[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize; npy_intp Sptr = PyArray_STRIDES(%(a_ptr)s)[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)PyArray_DATA(%(z)s);
const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)%(a_val)s->data; const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)PyArray_DATA(%(a_val)s);
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)PyArray_DATA(%(a_ind)s);
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)PyArray_DATA(%(a_ptr)s);
//npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0]; //npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
...@@ -393,7 +393,7 @@ class StructuredDotCSR(gof.Op): ...@@ -393,7 +393,7 @@ class StructuredDotCSR(gof.Op):
for (npy_int64 m = 0; m < M; ++m) for (npy_int64 m = 0; m < M; ++m)
{ {
// pointer to m-th row of the output matrix Z // pointer to m-th row of the output matrix Z
dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m); dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(PyArray_BYTES(%(z)s) + PyArray_STRIDES(%(z)s)[0] * m);
// loop over sparse rows indices through index pointer array // loop over sparse rows indices through index pointer array
// (amounts to looping over cols k of sparse matrix) // (amounts to looping over cols k of sparse matrix)
...@@ -403,7 +403,7 @@ class StructuredDotCSR(gof.Op): ...@@ -403,7 +403,7 @@ class StructuredDotCSR(gof.Op):
const dtype_%(a_val)s Amk = Dval[k_idx * Sval]; // actual value at that location const dtype_%(a_val)s Amk = Dval[k_idx * Sval]; // actual value at that location
// get pointer to k-th row of dense matrix // get pointer to k-th row of dense matrix
const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(%(b)s->data + %(b)s->strides[0] * k); const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(PyArray_BYTES(%(b)s) + PyArray_STRIDES(%(b)s)[0] * k);
// loop over final dimension (cols of dense matrix) and perform dot product // loop over final dimension (cols of dense matrix) and perform dot product
for(npy_int32 n = 0; n < N; ++n) for(npy_int32 n = 0; n < N; ++n)
...@@ -566,25 +566,25 @@ class UsmmCscDense(gof.Op): ...@@ -566,25 +566,25 @@ class UsmmCscDense(gof.Op):
if (PyArray_NDIM(%(x_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;} if (PyArray_NDIM(%(x_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (PyArray_NDIM(%(y)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} if (PyArray_NDIM(%(y)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (PyArray_DESCR(%(x_val)s)->type_num != %(typenum_x_val)s) { if (PyArray_TYPE(%(x_val)s) != %(typenum_x_val)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val"); %(fail)s;}
if (PyArray_DESCR(%(y)s)->type_num != %(typenum_y)s) { if (PyArray_TYPE(%(y)s) != %(typenum_y)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;}
if (PyArray_DESCR(%(z)s)->type_num != %(typenum_z)s) { if (PyArray_TYPE(%(z)s) != %(typenum_z)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z"); %(fail)s;}
if (PyArray_DESCR(%(alpha)s)->type_num != %(typenum_alpha)s) { if (PyArray_TYPE(%(alpha)s) != %(typenum_alpha)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha"); %(fail)s;}
if (PyArray_DESCR(%(x_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(x_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(x_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(x_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(x_nrows)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(x_nrows)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32"); %(fail)s;}
if (PyArray_DIMS(%(x_val)s)[0] != PyArray_DIMS(%(x_ind)s)[0]) if (PyArray_DIMS(%(x_val)s)[0] != PyArray_DIMS(%(x_ind)s)[0])
...@@ -593,7 +593,7 @@ class UsmmCscDense(gof.Op): ...@@ -593,7 +593,7 @@ class UsmmCscDense(gof.Op):
if (PyArray_DIMS(%(x_ptr)s)[0] != PyArray_DIMS(%(y)s)[0]+1) if (PyArray_DIMS(%(x_ptr)s)[0] != PyArray_DIMS(%(y)s)[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;}
if (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)%(x_nrows)s->data)[0] || PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(y)s)[1]) if (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)PyArray_DATA(%(x_nrows)s))[0] || PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(y)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size."); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size."); %(fail)s;}
if (PyArray_SIZE(%(alpha)s) != 1) if (PyArray_SIZE(%(alpha)s) != 1)
...@@ -621,13 +621,13 @@ class UsmmCscDense(gof.Op): ...@@ -621,13 +621,13 @@ class UsmmCscDense(gof.Op):
Py_INCREF(%(zn)s); Py_INCREF(%(zn)s);
} }
else if (!%(zn)s else if (!%(zn)s
|| (PyArray_DIMS(%(zn)s)[0] != ((npy_int32 *)%(x_nrows)s->data)[0]) || (PyArray_DIMS(%(zn)s)[0] != ((npy_int32 *)PyArray_DATA(%(x_nrows)s))[0])
|| (PyArray_DIMS(%(zn)s)[1] != PyArray_DIMS(%(y)s)[1]) || (PyArray_DIMS(%(zn)s)[1] != PyArray_DIMS(%(y)s)[1])
) )
{ {
{Py_XDECREF(%(zn)s);} {Py_XDECREF(%(zn)s);}
npy_intp dims[] = {0, 0}; npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)%(x_nrows)s->data)[0]; dims[0] = ((npy_int32 *)PyArray_DATA(%(x_nrows)s))[0];
dims[1] = PyArray_DIMS(%(y)s)[1]; dims[1] = PyArray_DIMS(%(y)s)[1];
%(zn)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_zn)s); %(zn)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_zn)s);
} }
...@@ -639,17 +639,17 @@ class UsmmCscDense(gof.Op): ...@@ -639,17 +639,17 @@ class UsmmCscDense(gof.Op):
npy_intp K = PyArray_DIMS(%(y)s)[0]; npy_intp K = PyArray_DIMS(%(y)s)[0];
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
const dtype_%(x_val)s* __restrict__ Dval = (dtype_%(x_val)s*)%(x_val)s->data; const dtype_%(x_val)s* __restrict__ Dval = (dtype_%(x_val)s*)PyArray_DATA(%(x_val)s);
const npy_int32 * __restrict__ Dind = (npy_int32*)%(x_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)PyArray_DATA(%(x_ind)s);
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(x_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)PyArray_DATA(%(x_ptr)s);
const dtype_%(alpha)s alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0]; const dtype_%(alpha)s alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
npy_intp Sz = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize; npy_intp Sz = PyArray_STRIDES(%(z)s)[1] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(zn)s->strides[1] / PyArray_DESCR(%(zn)s)->elsize; npy_intp Szn = PyArray_STRIDES(%(zn)s)[1] / PyArray_DESCR(%(zn)s)->elsize;
npy_intp Sval = %(x_val)s->strides[0] / PyArray_DESCR(%(x_val)s)->elsize; npy_intp Sval = PyArray_STRIDES(%(x_val)s)[0] / PyArray_DESCR(%(x_val)s)->elsize;
npy_intp Sind = %(x_ind)s->strides[0] / PyArray_DESCR(%(x_ind)s)->elsize; npy_intp Sind = PyArray_STRIDES(%(x_ind)s)[0] / PyArray_DESCR(%(x_ind)s)->elsize;
npy_intp Sptr = %(x_ptr)s->strides[0] / PyArray_DESCR(%(x_ptr)s)->elsize; npy_intp Sptr = PyArray_STRIDES(%(x_ptr)s)[0] / PyArray_DESCR(%(x_ptr)s)->elsize;
npy_intp Sy = %(y)s->strides[1] / PyArray_DESCR(%(y)s)->elsize; npy_intp Sy = PyArray_STRIDES(%(y)s)[1] / PyArray_DESCR(%(y)s)->elsize;
if (!(%(inplace)s)) if (!(%(inplace)s))
...@@ -669,14 +669,14 @@ class UsmmCscDense(gof.Op): ...@@ -669,14 +669,14 @@ class UsmmCscDense(gof.Op):
const dtype_%(x_val)s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location const dtype_%(x_val)s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location
dtype_%(y)s* y_row = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * k); dtype_%(y)s* y_row = (dtype_%(y)s*)(PyArray_BYTES(%(y)s) + PyArray_STRIDES(%(y)s)[0] * k);
// axpy expects pointer to the beginning of memory arrays, // axpy expects pointer to the beginning of memory arrays,
// so when the stride is negative, we need to get the // so when the stride is negative, we need to get the
// last element // last element
if (Sy < 0) if (Sy < 0)
y_row += (K - 1) * Sy; y_row += (K - 1) * Sy;
dtype_%(zn)s* z_row = (dtype_%(zn)s*)(%(zn)s->data + %(zn)s->strides[0] * m); dtype_%(zn)s* z_row = (dtype_%(zn)s*)(PyArray_BYTES(%(zn)s) + PyArray_STRIDES(%(zn)s)[0] * m);
if (Szn < 0) if (Szn < 0)
z_row += (N - 1) * Szn; z_row += (N - 1) * Szn;
...@@ -775,16 +775,16 @@ class CSMGradC(gof.Op): ...@@ -775,16 +775,16 @@ class CSMGradC(gof.Op):
if (PyArray_NDIM(%(b_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;} if (PyArray_NDIM(%(b_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;}
if (PyArray_NDIM(%(b_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(b_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;}
if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(a_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(a_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(b_ind)s)->type_num != NPY_INT32) { if (PyArray_TYPE(%(b_ind)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32"); %(fail)s;}
if (PyArray_DESCR(%(b_ptr)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(b_ptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32"); %(fail)s;}
if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0]) if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
...@@ -807,28 +807,28 @@ class CSMGradC(gof.Op): ...@@ -807,28 +807,28 @@ class CSMGradC(gof.Op):
{ {
// sparse array has size MxK, dense KxN, output MxN // sparse array has size MxK, dense KxN, output MxN
npy_intp M = PyArray_DIMS(%(a_ptr)s)[0] - 1; npy_intp M = PyArray_DIMS(%(a_ptr)s)[0] - 1;
npy_intp a_dim_0 = ((npy_int32 *)%(a_dim)s->data)[0]; npy_intp a_dim_0 = ((npy_int32 *)PyArray_DATA(%(a_dim)s))[0];
npy_intp a_dim_1 = ((npy_int32 *)%(a_dim)s->data)[1]; npy_intp a_dim_1 = ((npy_int32 *)PyArray_DATA(%(a_dim)s))[1];
npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0; npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
// strides tell you how many bytes to skip to go to next column/row entry // strides tell you how many bytes to skip to go to next column/row entry
npy_intp Sz = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize; npy_intp Sz = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Sa_val = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize; npy_intp Sa_val = PyArray_STRIDES(%(a_val)s)[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sa_ind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize; npy_intp Sa_ind = PyArray_STRIDES(%(a_ind)s)[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sa_ptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize; npy_intp Sa_ptr = PyArray_STRIDES(%(a_ptr)s)[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
npy_intp Sb_val = %(b_val)s->strides[0] / PyArray_DESCR(%(b_val)s)->elsize; npy_intp Sb_val = PyArray_STRIDES(%(b_val)s)[0] / PyArray_DESCR(%(b_val)s)->elsize;
npy_intp Sb_ind = %(b_ind)s->strides[0] / PyArray_DESCR(%(b_ind)s)->elsize; npy_intp Sb_ind = PyArray_STRIDES(%(b_ind)s)[0] / PyArray_DESCR(%(b_ind)s)->elsize;
npy_intp Sb_ptr = %(b_ptr)s->strides[0] / PyArray_DESCR(%(b_ptr)s)->elsize; npy_intp Sb_ptr = PyArray_STRIDES(%(b_ptr)s)[0] / PyArray_DESCR(%(b_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)PyArray_DATA(%(z)s);
const dtype_%(a_val)s* __restrict__ Da_val = (dtype_%(a_val)s*)%(a_val)s->data; const dtype_%(a_val)s* __restrict__ Da_val = (dtype_%(a_val)s*)PyArray_DATA(%(a_val)s);
const npy_int32 * __restrict__ Da_ind = (npy_int32*)%(a_ind)s->data; const npy_int32 * __restrict__ Da_ind = (npy_int32*)PyArray_DATA(%(a_ind)s);
const npy_int32 * __restrict__ Da_ptr = (npy_int32*)%(a_ptr)s->data; const npy_int32 * __restrict__ Da_ptr = (npy_int32*)PyArray_DATA(%(a_ptr)s);
const dtype_%(b_val)s* __restrict__ Db_val = (dtype_%(b_val)s*)%(b_val)s->data; const dtype_%(b_val)s* __restrict__ Db_val = (dtype_%(b_val)s*)PyArray_DATA(%(b_val)s);
const npy_int32 * __restrict__ Db_ind = (npy_int32*)%(b_ind)s->data; const npy_int32 * __restrict__ Db_ind = (npy_int32*)PyArray_DATA(%(b_ind)s);
const npy_int32 * __restrict__ Db_ptr = (npy_int32*)%(b_ptr)s->data; const npy_int32 * __restrict__ Db_ptr = (npy_int32*)PyArray_DATA(%(b_ptr)s);
npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0]; npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
...@@ -937,10 +937,10 @@ class MulSDCSC(gof.Op): ...@@ -937,10 +937,10 @@ class MulSDCSC(gof.Op):
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;} %(fail)s;}
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s || if (!%(_zout)s ||
...@@ -949,7 +949,7 @@ class MulSDCSC(gof.Op): ...@@ -949,7 +949,7 @@ class MulSDCSC(gof.Op):
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_b)s));
if (!%(_zout)s) if (!%(_zout)s)
{ {
PyErr_SetString(PyExc_MemoryError, PyErr_SetString(PyExc_MemoryError,
...@@ -963,13 +963,13 @@ class MulSDCSC(gof.Op): ...@@ -963,13 +963,13 @@ class MulSDCSC(gof.Op):
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * const __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * const __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)PyArray_DATA(%(_zout)s);
const npy_intp Sb = %(_b)s->strides[0]; const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0];
// loop over columns // loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
...@@ -981,7 +981,7 @@ class MulSDCSC(gof.Op): ...@@ -981,7 +981,7 @@ class MulSDCSC(gof.Op):
npy_int32 i = indices[i_idx]; npy_int32 i = indices[i_idx];
// extract i-th row of dense matrix // extract i-th row of dense matrix
const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(%(_b)s->data + Sb * i); const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(PyArray_BYTES(%(_b)s) + Sb * i);
// write resulting gradient to sparse output // write resulting gradient to sparse output
zout[i_idx] = data[i_idx] * b_row[j]; zout[i_idx] = data[i_idx] * b_row[j];
...@@ -1053,10 +1053,10 @@ class MulSDCSR(gof.Op): ...@@ -1053,10 +1053,10 @@ class MulSDCSR(gof.Op):
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;} %(fail)s;}
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s || if (!%(_zout)s ||
...@@ -1065,7 +1065,7 @@ class MulSDCSR(gof.Op): ...@@ -1065,7 +1065,7 @@ class MulSDCSR(gof.Op):
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_b)s));
if (!%(_zout)s) if (!%(_zout)s)
{ {
PyErr_SetString(PyExc_MemoryError, PyErr_SetString(PyExc_MemoryError,
...@@ -1079,19 +1079,19 @@ class MulSDCSR(gof.Op): ...@@ -1079,19 +1079,19 @@ class MulSDCSR(gof.Op):
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * const __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * const __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)PyArray_DATA(%(_zout)s);
const npy_intp Sb = %(_b)s->strides[0]; const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0];
// loop over columns // loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
{ {
// extract i-th row of dense matrix // extract i-th row of dense matrix
const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(%(_b)s->data + Sb * j); const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(PyArray_BYTES(%(_b)s) + Sb * j);
// for each non-null value in the sparse column // for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx) for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
...@@ -1209,10 +1209,10 @@ class MulSVCSR(gof.Op): ...@@ -1209,10 +1209,10 @@ class MulSVCSR(gof.Op):
%(fail)s; %(fail)s;
} }
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s if (!%(_zout)s
...@@ -1221,7 +1221,7 @@ class MulSVCSR(gof.Op): ...@@ -1221,7 +1221,7 @@ class MulSVCSR(gof.Op):
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_b)s));
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
...@@ -1229,15 +1229,15 @@ class MulSVCSR(gof.Op): ...@@ -1229,15 +1229,15 @@ class MulSVCSR(gof.Op):
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * const __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * const __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
const dtype_%(_b)s* __restrict__ Db = (dtype_%(_b)s*)%(_b)s->data; const dtype_%(_b)s* __restrict__ Db = (dtype_%(_b)s*)PyArray_DATA(%(_b)s);
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)PyArray_DATA(%(_zout)s);
const npy_intp Sb = %(_b)s->strides[0] / PyArray_DESCR(%(_b)s)->elsize; const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over rows // loop over rows
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
...@@ -1359,10 +1359,10 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1359,10 +1359,10 @@ class StructuredAddSVCSR(gof.Op):
%(fail)s; %(fail)s;
} }
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) { if( PyArray_TYPE(%(_indices)s) != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_TYPE(%(_indptr)s) != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s if (!%(_zout)s
...@@ -1371,7 +1371,7 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1371,7 +1371,7 @@ class StructuredAddSVCSR(gof.Op):
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_TYPE(%(_b)s));
if (!%(_zout)s) if (!%(_zout)s)
{ {
PyErr_SetString(PyExc_MemoryError, PyErr_SetString(PyExc_MemoryError,
...@@ -1385,15 +1385,15 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1385,15 +1385,15 @@ class StructuredAddSVCSR(gof.Op):
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * const __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * const __restrict__ indices = (npy_int32 *)PyArray_DATA(%(_indices)s);
const dtype_%(_b)s* __restrict__ Db = (dtype_%(_b)s*)%(_b)s->data; const dtype_%(_b)s* __restrict__ Db = (dtype_%(_b)s*)PyArray_DATA(%(_b)s);
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)PyArray_DATA(%(_zout)s);
const npy_intp Sb = %(_b)s->strides[0] / PyArray_DESCR(%(_b)s)->elsize; const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over columns // loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
...@@ -1575,17 +1575,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;} ...@@ -1575,17 +1575,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
if (PyArray_NDIM(%(y)s) != 2) { if (PyArray_NDIM(%(y)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (PyArray_DESCR(%(x)s)->type_num != %(typenum_x)s) { if (PyArray_TYPE(%(x)s) != %(typenum_x)s) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for x"); "Invalid type for x");
%(fail)s;} %(fail)s;}
if (PyArray_DESCR(%(y)s)->type_num != %(typenum_y)s) { if (PyArray_TYPE(%(y)s) != %(typenum_y)s) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for y"); "Invalid type for y");
%(fail)s;} %(fail)s;}
if (PyArray_DESCR(%(p_data)s)->type_num != %(typenum_p)s) { if (PyArray_TYPE(%(p_data)s) != %(typenum_p)s) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for pattern"); "Invalid type for pattern");
%(fail)s;} %(fail)s;}
...@@ -1595,7 +1595,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1595,7 +1595,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
"x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed."); "x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
%(fail)s;} %(fail)s;}
if (PyArray_DIMS(%(y)s)[0] != ((npy_int32 *)%(p_ncols)s->data)[0] || if (PyArray_DIMS(%(y)s)[0] != ((npy_int32 *)PyArray_DATA(%(p_ncols)s))[0] ||
PyArray_DIMS(%(x)s)[0] != (PyArray_DIMS(%(p_ptr)s)[0] - 1)) PyArray_DIMS(%(x)s)[0] != (PyArray_DIMS(%(p_ptr)s)[0] - 1))
{PyErr_SetString(PyExc_NotImplementedError, {PyErr_SetString(PyExc_NotImplementedError,
"The dimension of the pattern and the output must match"); %(fail)s;} "The dimension of the pattern and the output must match"); %(fail)s;}
...@@ -1603,7 +1603,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1603,7 +1603,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
// Allocate output // Allocate output
if (!%(z_data)s if (!%(z_data)s
|| (PyArray_DIMS(%(z_data)s)[0] != PyArray_DIMS(%(p_data)s)[0]) || (PyArray_DIMS(%(z_data)s)[0] != PyArray_DIMS(%(p_data)s)[0])
|| (PyArray_DESCR(%(z_data)s)->type_num != %(typenum_zd)s) || (PyArray_TYPE(%(z_data)s) != %(typenum_zd)s)
|| !(PyArray_ISCONTIGUOUS(%(z_data)s))) || !(PyArray_ISCONTIGUOUS(%(z_data)s)))
{ {
{Py_XDECREF(%(z_data)s);} {Py_XDECREF(%(z_data)s);}
...@@ -1614,7 +1614,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1614,7 +1614,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
} }
if (!%(z_ind)s if (!%(z_ind)s
|| (PyArray_DIMS(%(z_ind)s)[0] != PyArray_DIMS(%(p_ind)s)[0]) || (PyArray_DIMS(%(z_ind)s)[0] != PyArray_DIMS(%(p_ind)s)[0])
|| (PyArray_DESCR(%(z_ind)s)->type_num != %(typenum_zi)s) || (PyArray_TYPE(%(z_ind)s) != %(typenum_zi)s)
|| !(PyArray_ISCONTIGUOUS(%(z_ind)s))) || !(PyArray_ISCONTIGUOUS(%(z_ind)s)))
{ {
{Py_XDECREF(%(z_ind)s);} {Py_XDECREF(%(z_ind)s);}
...@@ -1625,7 +1625,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1625,7 +1625,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
} }
if (!%(z_ptr)s if (!%(z_ptr)s
|| (PyArray_DIMS(%(z_ptr)s)[0] != PyArray_DIMS(%(p_ptr)s)[0]) || (PyArray_DIMS(%(z_ptr)s)[0] != PyArray_DIMS(%(p_ptr)s)[0])
|| (PyArray_DESCR(%(z_ptr)s)->type_num != %(typenum_zp)s) || (PyArray_TYPE(%(z_ptr)s) != %(typenum_zp)s)
|| !(PyArray_ISCONTIGUOUS(%(z_ptr)s))) || !(PyArray_ISCONTIGUOUS(%(z_ptr)s)))
{ {
{Py_XDECREF(%(z_ptr)s);} {Py_XDECREF(%(z_ptr)s);}
...@@ -1642,23 +1642,23 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1642,23 +1642,23 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
npy_intp K = PyArray_DIMS(%(y)s)[1]; npy_intp K = PyArray_DIMS(%(y)s)[1];
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data; const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)PyArray_DATA(%(x)s);
const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)%(y)s->data; const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)PyArray_DATA(%(y)s);
const dtype_%(p_data)s* __restrict__ Dpd = (dtype_%(p_data)s*)%(p_data)s->data; const dtype_%(p_data)s* __restrict__ Dpd = (dtype_%(p_data)s*)PyArray_DATA(%(p_data)s);
const dtype_%(p_ind)s* __restrict__ Dpi = (dtype_%(p_ind)s*)%(p_ind)s->data; const dtype_%(p_ind)s* __restrict__ Dpi = (dtype_%(p_ind)s*)PyArray_DATA(%(p_ind)s);
const dtype_%(p_ptr)s* __restrict__ Dpp = (dtype_%(p_ptr)s*)%(p_ptr)s->data; const dtype_%(p_ptr)s* __restrict__ Dpp = (dtype_%(p_ptr)s*)PyArray_DATA(%(p_ptr)s);
dtype_%(z_data)s* __restrict__ Dzd = (dtype_%(z_data)s*)%(z_data)s->data; dtype_%(z_data)s* __restrict__ Dzd = (dtype_%(z_data)s*)PyArray_DATA(%(z_data)s);
dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)%(z_ind)s->data; dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)PyArray_DATA(%(z_ind)s);
dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)%(z_ptr)s->data; dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)PyArray_DATA(%(z_ptr)s);
const npy_intp Sdx = %(x)s->strides[1]/PyArray_DESCR(%(x)s)->elsize; const npy_intp Sdx = PyArray_STRIDES(%(x)s)[1]/PyArray_DESCR(%(x)s)->elsize;
const npy_intp Sdy = %(y)s->strides[1]/PyArray_DESCR(%(y)s)->elsize; const npy_intp Sdy = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
const npy_intp Sdpd = %(p_data)s->strides[0] / PyArray_DESCR(%(p_data)s)->elsize; const npy_intp Sdpd = PyArray_STRIDES(%(p_data)s)[0] / PyArray_DESCR(%(p_data)s)->elsize;
const npy_intp Sdpi = %(p_ind)s->strides[0] / PyArray_DESCR(%(p_ind)s)->elsize; const npy_intp Sdpi = PyArray_STRIDES(%(p_ind)s)[0] / PyArray_DESCR(%(p_ind)s)->elsize;
const npy_intp Sdpp = %(p_ptr)s->strides[0] / PyArray_DESCR(%(p_ptr)s)->elsize; const npy_intp Sdpp = PyArray_STRIDES(%(p_ptr)s)[0] / PyArray_DESCR(%(p_ptr)s)->elsize;
const npy_intp Sdzd = %(z_data)s->strides[0] / PyArray_DESCR(%(z_data)s)->elsize; const npy_intp Sdzd = PyArray_STRIDES(%(z_data)s)[0] / PyArray_DESCR(%(z_data)s)->elsize;
const npy_intp Sdzi = %(z_ind)s->strides[0] / PyArray_DESCR(%(z_ind)s)->elsize; const npy_intp Sdzi = PyArray_STRIDES(%(z_ind)s)[0] / PyArray_DESCR(%(z_ind)s)->elsize;
const npy_intp Sdzp = %(z_ptr)s->strides[0] / PyArray_DESCR(%(z_ptr)s)->elsize; const npy_intp Sdzp = PyArray_STRIDES(%(z_ptr)s)[0] / PyArray_DESCR(%(z_ptr)s)->elsize;
memcpy(Dzi, Dpi, PyArray_DIMS(%(p_ind)s)[0]*sizeof(dtype_%(p_ind)s)); memcpy(Dzi, Dpi, PyArray_DIMS(%(p_ind)s)[0]*sizeof(dtype_%(p_ind)s));
memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s)); memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s));
...@@ -1667,9 +1667,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1667,9 +1667,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) { for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
const dtype_%(x)s* x_row = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * m); const dtype_%(x)s* x_row = (dtype_%(x)s*)(PyArray_DATA(%(x)s) + PyArray_STRIDES(%(x)s)[0] * m);
const dtype_%(y)s* y_col = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * n); const dtype_%(y)s* y_col = (dtype_%(y)s*)(PyArray_DATA(%(y)s) + PyArray_STRIDES(%(y)s)[0] * n);
Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy); Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] * %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy);
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论