Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
3fb06c11
提交
3fb06c11
authored
10月 25, 2013
作者:
Frederic
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
use current numpy c api
上级
b01cf487
显示空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
166 行增加
和
166 行删除
+166
-166
basic.py
theano/sparse/basic.py
+27
-27
opt.py
theano/sparse/opt.py
+139
-139
没有找到文件。
theano/sparse/basic.py
浏览文件 @
3fb06c11
...
@@ -1795,9 +1795,9 @@ class AddSD(gof.op.Op):
...
@@ -1795,9 +1795,9 @@ class AddSD(gof.op.Op):
}
}
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_int32 * __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * __restrict__ indices = (npy_int32*)
%(_indices)
s->data
;
const npy_int32 * __restrict__ indices = (npy_int32*)
PyArray_DATA(
%(_indices)
s)
;
const dtype_
%(_data)
s* __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s* __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
dtype_
%(y)
s* ydata = (dtype_
%(y)
s*)PyArray_DATA(
%(y)
s);
dtype_
%(y)
s* ydata = (dtype_
%(y)
s*)PyArray_DATA(
%(y)
s);
dtype_
%(z)
s* zdata = (dtype_
%(z)
s*)PyArray_DATA(
%(z)
s);
dtype_
%(z)
s* zdata = (dtype_
%(z)
s*)PyArray_DATA(
%(z)
s);
...
@@ -2983,10 +2983,10 @@ class StructuredDotGradCSC(gof.Op):
...
@@ -2983,10 +2983,10 @@ class StructuredDotGradCSC(gof.Op):
if (PyArray_NDIM(
%(_indices)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indices)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if( PyArray_DIMS(
%(_d)
s)[1] != PyArray_DIMS(
%(_g)
s)[1])
if( PyArray_DIMS(
%(_d)
s)[1] != PyArray_DIMS(
%(_g)
s)[1])
...
@@ -2996,29 +2996,29 @@ class StructuredDotGradCSC(gof.Op):
...
@@ -2996,29 +2996,29 @@ class StructuredDotGradCSC(gof.Op):
|| (PyArray_DIMS(
%(_zout)
s)[0] != PyArray_DIMS(
%(_indices)
s)[0]))
|| (PyArray_DIMS(
%(_zout)
s)[0] != PyArray_DIMS(
%(_indices)
s)[0]))
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_g)
s)->type_num
);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_g)
s)
);
}
}
{ //makes it compile even though labels jump over variable definitions.
{ //makes it compile even though labels jump over variable definitions.
npy_intp nnz = PyArray_DIMS(
%(_indices)
s)[0];
npy_intp nnz = PyArray_DIMS(
%(_indices)
s)[0];
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1; //TODO: error checking with this
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1; //TODO: error checking with this
npy_intp Sindices =
%(_indices)
s->strides
[0]/PyArray_DESCR(
%(_indices)
s)->elsize;
npy_intp Sindices =
PyArray_STRIDES(
%(_indices)
s)
[0]/PyArray_DESCR(
%(_indices)
s)->elsize;
npy_intp Sindptr =
%(_indptr)
s->strides
[0]/PyArray_DESCR(
%(_indptr)
s)->elsize;
npy_intp Sindptr =
PyArray_STRIDES(
%(_indptr)
s)
[0]/PyArray_DESCR(
%(_indptr)
s)->elsize;
const npy_intp Sd1 =
%(_d)
s->strides
[1]/PyArray_DESCR(
%(_d)
s)->elsize;
const npy_intp Sd1 =
PyArray_STRIDES(
%(_d)
s)
[1]/PyArray_DESCR(
%(_d)
s)->elsize;
const npy_intp Sg1 =
%(_g)
s->strides
[1]/PyArray_DESCR(
%(_g)
s)->elsize;
const npy_intp Sg1 =
PyArray_STRIDES(
%(_g)
s)
[1]/PyArray_DESCR(
%(_g)
s)->elsize;
const npy_intp K = PyArray_DIMS(
%(_d)
s)[1];
const npy_intp K = PyArray_DIMS(
%(_d)
s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
// loop over columns
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
{
{
// extract j-th row of dense matrix
// extract j-th row of dense matrix
const dtype_
%(_d)
s* __restrict__ d_row = (dtype_
%(_d)
s*)(
%(_d)
s->data +
%(_d)
s->strides
[0] * j);
const dtype_
%(_d)
s* __restrict__ d_row = (dtype_
%(_d)
s*)(
PyArray_BYTES(
%(_d)
s) + PyArray_STRIDES(
%(_d)
s)
[0] * j);
if(j >= PyArray_DIMS(
%(_d)
s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G");
%(fail)
s;}
if(j >= PyArray_DIMS(
%(_d)
s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G");
%(fail)
s;}
// for each non-null value in the sparse column
// for each non-null value in the sparse column
...
@@ -3028,7 +3028,7 @@ class StructuredDotGradCSC(gof.Op):
...
@@ -3028,7 +3028,7 @@ class StructuredDotGradCSC(gof.Op):
npy_int32 i = indices[i_idx * Sindices];
npy_int32 i = indices[i_idx * Sindices];
// extract corresponding row in gradient
// extract corresponding row in gradient
const dtype_
%(_g)
s* __restrict__ g_row = (dtype_
%(_g)
s*)(
%(_g)
s->data +
%(_g)
s->strides
[0] * i);
const dtype_
%(_g)
s* __restrict__ g_row = (dtype_
%(_g)
s*)(
PyArray_BYTES(
%(_g)
s) + PyArray_STRIDES(
%(_g)
s)
[0] * i);
double ip = 0.0;
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// make sure that row index is not bigger than actual number of rows
...
@@ -3044,7 +3044,7 @@ class StructuredDotGradCSC(gof.Op):
...
@@ -3044,7 +3044,7 @@ class StructuredDotGradCSC(gof.Op):
}
}
// write resulting gradient to sparse output
// write resulting gradient to sparse output
((dtype_
%(_zout)
s* __restrict__)(
%(_zout)
s->data + i_idx *
%(_zout)
s->strides
[0]))[0] = ip;
((dtype_
%(_zout)
s* __restrict__)(
PyArray_BYTES(
%(_zout)
s) + i_idx * PyArray_STRIDES(
%(_zout)
s)
[0]))[0] = ip;
}
}
}
}
}
}
...
@@ -3119,10 +3119,10 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3119,10 +3119,10 @@ class StructuredDotGradCSR(gof.Op):
if (PyArray_NDIM(
%(_indices)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indices)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(_indptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if( PyArray_DIMS(
%(_d)
s)[1] != PyArray_DIMS(
%(_g)
s)[1])
if( PyArray_DIMS(
%(_d)
s)[1] != PyArray_DIMS(
%(_g)
s)[1])
...
@@ -3132,7 +3132,7 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3132,7 +3132,7 @@ class StructuredDotGradCSR(gof.Op):
|| (PyArray_DIMS(
%(_zout)
s)[0] != PyArray_DIMS(
%(_indices)
s)[0]))
|| (PyArray_DIMS(
%(_zout)
s)[0] != PyArray_DIMS(
%(_indices)
s)[0]))
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_g)
s)->type_num
);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_g)
s)
);
}
}
{ //makes it compile even though labels jump over variable definitions.
{ //makes it compile even though labels jump over variable definitions.
...
@@ -3140,16 +3140,16 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3140,16 +3140,16 @@ class StructuredDotGradCSR(gof.Op):
// extract number of rows
// extract number of rows
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1; //TODO: error checking with this
npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1; //TODO: error checking with this
npy_intp Sindices =
%(_indices)
s->strides
[0]/PyArray_DESCR(
%(_indices)
s)->elsize;
npy_intp Sindices =
PyArray_STRIDES(
%(_indices)
s)
[0]/PyArray_DESCR(
%(_indices)
s)->elsize;
npy_intp Sindptr =
%(_indptr)
s->strides
[0]/PyArray_DESCR(
%(_indptr)
s)->elsize;
npy_intp Sindptr =
PyArray_STRIDES(
%(_indptr)
s)
[0]/PyArray_DESCR(
%(_indptr)
s)->elsize;
const npy_intp Sd1 =
%(_d)
s->strides
[1]/PyArray_DESCR(
%(_d)
s)->elsize;
const npy_intp Sd1 =
PyArray_STRIDES(
%(_d)
s)
[1]/PyArray_DESCR(
%(_d)
s)->elsize;
const npy_intp Sg1 =
%(_g)
s->strides
[1]/PyArray_DESCR(
%(_g)
s)->elsize;
const npy_intp Sg1 =
PyArray_STRIDES(
%(_g)
s)
[1]/PyArray_DESCR(
%(_g)
s)->elsize;
const npy_intp K = PyArray_DIMS(
%(_d)
s)[1];
const npy_intp K = PyArray_DIMS(
%(_d)
s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
// loop over columns of sparse matrix
// loop over columns of sparse matrix
for (npy_int32 i = 0; i < N; ++i)
for (npy_int32 i = 0; i < N; ++i)
...
@@ -3161,11 +3161,11 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3161,11 +3161,11 @@ class StructuredDotGradCSR(gof.Op):
npy_int32 j = indices[j_idx * Sindices];
npy_int32 j = indices[j_idx * Sindices];
// extract j-th row of dense matrix
// extract j-th row of dense matrix
const dtype_
%(_d)
s* __restrict__ d_row = (dtype_
%(_d)
s*)(
%(_d)
s->data +
%(_d)
s->strides
[0] * j);
const dtype_
%(_d)
s* __restrict__ d_row = (dtype_
%(_d)
s*)(
PyArray_BYTES(
%(_d)
s) + PyArray_STRIDES(
%(_d)
s)
[0] * j);
if(j >= PyArray_DIMS(
%(_d)
s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G");
%(fail)
s;}
if(j >= PyArray_DIMS(
%(_d)
s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G");
%(fail)
s;}
// extract corresponding row in gradient
// extract corresponding row in gradient
const dtype_
%(_g)
s* __restrict__ g_row = (dtype_
%(_g)
s*)(
%(_g)
s->data +
%(_g)
s->strides
[0] * i);
const dtype_
%(_g)
s* __restrict__ g_row = (dtype_
%(_g)
s*)(
PyArray_BYTES(
%(_g)
s) + PyArray_STRIDES(
%(_g)
s)
[0] * i);
double ip = 0.0;
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// make sure that row index is not bigger than actual number of rows
...
@@ -3181,7 +3181,7 @@ class StructuredDotGradCSR(gof.Op):
...
@@ -3181,7 +3181,7 @@ class StructuredDotGradCSR(gof.Op):
}
}
// write resulting gradient to sparse output
// write resulting gradient to sparse output
((dtype_
%(_zout)
s* __restrict__)(
%(_zout)
s->data + j_idx *
%(_zout)
s->strides
[0]))[0] = ip;
((dtype_
%(_zout)
s* __restrict__)(
PyArray_BYTES(
%(_zout)
s) + j_idx * PyArray_STRIDES(
%(_zout)
s)
[0]))[0] = ip;
}
}
}
}
}
}
...
...
theano/sparse/opt.py
浏览文件 @
3fb06c11
...
@@ -142,19 +142,19 @@ class StructuredDotCSC(gof.Op):
...
@@ -142,19 +142,19 @@ class StructuredDotCSC(gof.Op):
if (PyArray_NDIM(
%(a_nrows)
s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0");
%(fail)
s;}
if (PyArray_NDIM(
%(a_nrows)
s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0");
%(fail)
s;}
if (PyArray_NDIM(
%(b)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)
s;}
if (PyArray_NDIM(
%(b)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_val)
s)->type_num
!=
%(typenum_a_val)
s) {
if (PyArray_
TYPE(
%(a_val)
s)
!=
%(typenum_a_val)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val");
%(fail)
s;}
if (PyArray_
DESCR(
%(b)
s)->type_num
!=
%(typenum_b)
s) {
if (PyArray_
TYPE(
%(b)
s)
!=
%(typenum_b)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(a_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(a_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_nrows)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(a_nrows)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32");
%(fail)
s;}
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
...
@@ -164,13 +164,13 @@ class StructuredDotCSC(gof.Op):
...
@@ -164,13 +164,13 @@ class StructuredDotCSC(gof.Op):
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows");
%(fail)
s;}
if ((!
%(z)
s)
if ((!
%(z)
s)
|| (PyArray_DIMS(
%(z)
s)[0] != ((npy_int32 *)
%(a_nrows)
s->data
)[0])
|| (PyArray_DIMS(
%(z)
s)[0] != ((npy_int32 *)
PyArray_DATA(
%(a_nrows)
s)
)[0])
|| (PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(b)
s)[1])
|| (PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(b)
s)[1])
)
)
{
{
{Py_XDECREF(
%(z)
s);}
{Py_XDECREF(
%(z)
s);}
npy_intp dims[] = {0, 0};
npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)
%(a_nrows)
s->data
)[0];
dims[0] = ((npy_int32 *)
PyArray_DATA(
%(a_nrows)
s)
)[0];
dims[1] = PyArray_DIMS(
%(b)
s)[1];
dims[1] = PyArray_DIMS(
%(b)
s)[1];
%(z)
s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
%(typenum_z)
s);
%(z)
s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
%(typenum_z)
s);
}
}
...
@@ -182,19 +182,19 @@ class StructuredDotCSC(gof.Op):
...
@@ -182,19 +182,19 @@ class StructuredDotCSC(gof.Op):
npy_intp K = PyArray_DIMS(
%(b)
s)[0];
npy_intp K = PyArray_DIMS(
%(b)
s)[0];
// strides tell you how many bytes to skip to go to next column/row entry
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm =
%(z)
s->strides
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szm =
PyArray_STRIDES(
%(z)
s)
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
%(z)
s->strides
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
PyArray_STRIDES(
%(z)
s)
[1] / PyArray_DESCR(
%(z)
s)->elsize;
//npy_intp Sbm =
%(b)
s->strides
[0] / PyArray_DESCR(
%(b)
s)->elsize;
//npy_intp Sbm =
PyArray_STRIDES(
%(b)
s)
[0] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbn =
%(b)
s->strides
[1] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbn =
PyArray_STRIDES(
%(b)
s)
[1] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sval =
%(a_val)
s->strides
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sval =
PyArray_STRIDES(
%(a_val)
s)
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sind =
%(a_ind)
s->strides
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sind =
PyArray_STRIDES(
%(a_ind)
s)
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sptr =
%(a_ptr)
s->strides
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
npy_intp Sptr =
PyArray_STRIDES(
%(a_ptr)
s)
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
%(z)
s->data
;
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
PyArray_DATA(
%(z)
s)
;
const dtype_
%(a_val)
s* __restrict__ Dval = (dtype_
%(a_val)
s*)
%(a_val)
s->data
;
const dtype_
%(a_val)
s* __restrict__ Dval = (dtype_
%(a_val)
s*)
PyArray_DATA(
%(a_val)
s)
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
%(a_ind)
s->data
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
PyArray_DATA(
%(a_ind)
s
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
%(a_ptr)
s->data
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
PyArray_DATA(
%(a_ptr)
s
;
//npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
//npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
...
@@ -218,7 +218,7 @@ class StructuredDotCSC(gof.Op):
...
@@ -218,7 +218,7 @@ class StructuredDotCSC(gof.Op):
for (npy_int32 k = 0; k < K; ++k)
for (npy_int32 k = 0; k < K; ++k)
{
{
// get pointer to k-th row of dense matrix
// get pointer to k-th row of dense matrix
const dtype_
%(b)
s* __restrict__ bk = (dtype_
%(b)
s*)(
%(b)
s->data +
%(b)
s->strides
[0] * k);
const dtype_
%(b)
s* __restrict__ bk = (dtype_
%(b)
s*)(
PyArray_BYTES(
%(b)
s) + PyArray_STRIDES(
%(b)
s)
[0] * k);
// loop over sparse column indices through index pointer array
// loop over sparse column indices through index pointer array
// (amounts to looping over rows M of sparse matrix)
// (amounts to looping over rows M of sparse matrix)
...
@@ -229,7 +229,7 @@ class StructuredDotCSC(gof.Op):
...
@@ -229,7 +229,7 @@ class StructuredDotCSC(gof.Op):
const dtype_
%(a_val)
s Amk = Dval[m_idx * Sval]; // actual value at that location
const dtype_
%(a_val)
s Amk = Dval[m_idx * Sval]; // actual value at that location
// pointer to m-th row of the output matrix Z
// pointer to m-th row of the output matrix Z
dtype_
%(z)
s* __restrict__ zm = (dtype_
%(z)
s*)(
%(z)
s->data +
%(z)
s->strides
[0] * m);
dtype_
%(z)
s* __restrict__ zm = (dtype_
%(z)
s*)(
PyArray_BYTES(
%(z)
s) + PyArray_STRIDES(
%(z)
s)
[0] * m);
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
if (m >= PyArray_DIMS(
%(z)
s)[0])
if (m >= PyArray_DIMS(
%(z)
s)[0])
...
@@ -330,10 +330,10 @@ class StructuredDotCSR(gof.Op):
...
@@ -330,10 +330,10 @@ class StructuredDotCSR(gof.Op):
if (PyArray_NDIM(
%(a_ptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(a_ptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(b)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)
s;}
if (PyArray_NDIM(
%(b)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(a_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(a_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
...
@@ -358,19 +358,19 @@ class StructuredDotCSR(gof.Op):
...
@@ -358,19 +358,19 @@ class StructuredDotCSR(gof.Op):
npy_intp K = PyArray_DIMS(
%(b)
s)[0];
npy_intp K = PyArray_DIMS(
%(b)
s)[0];
// strides tell you how many bytes to skip to go to next column/row entry
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm =
%(z)
s->strides
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szm =
PyArray_STRIDES(
%(z)
s)
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
%(z)
s->strides
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
PyArray_STRIDES(
%(z)
s)
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sbm =
%(b)
s->strides
[0] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbm =
PyArray_STRIDES(
%(b)
s)
[0] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbn =
%(b)
s->strides
[1] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sbn =
PyArray_STRIDES(
%(b)
s)
[1] / PyArray_DESCR(
%(b)
s)->elsize;
npy_intp Sval =
%(a_val)
s->strides
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sval =
PyArray_STRIDES(
%(a_val)
s)
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sind =
%(a_ind)
s->strides
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sind =
PyArray_STRIDES(
%(a_ind)
s)
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sptr =
%(a_ptr)
s->strides
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
npy_intp Sptr =
PyArray_STRIDES(
%(a_ptr)
s)
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
%(z)
s->data
;
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
PyArray_DATA(
%(z)
s)
;
const dtype_
%(a_val)
s* __restrict__ Dval = (dtype_
%(a_val)
s*)
%(a_val)
s->data
;
const dtype_
%(a_val)
s* __restrict__ Dval = (dtype_
%(a_val)
s*)
PyArray_DATA(
%(a_val)
s)
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
%(a_ind)
s->data
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
PyArray_DATA(
%(a_ind)
s)
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
%(a_ptr)
s->data
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
PyArray_DATA(
%(a_ptr)
s)
;
//npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
//npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
...
@@ -393,7 +393,7 @@ class StructuredDotCSR(gof.Op):
...
@@ -393,7 +393,7 @@ class StructuredDotCSR(gof.Op):
for (npy_int64 m = 0; m < M; ++m)
for (npy_int64 m = 0; m < M; ++m)
{
{
// pointer to m-th row of the output matrix Z
// pointer to m-th row of the output matrix Z
dtype_
%(z)
s* __restrict__ zm = (dtype_
%(z)
s*)(
%(z)
s->data +
%(z)
s->strides
[0] * m);
dtype_
%(z)
s* __restrict__ zm = (dtype_
%(z)
s*)(
PyArray_BYTES(
%(z)
s) + PyArray_STRIDES(
%(z)
s)
[0] * m);
// loop over sparse rows indices through index pointer array
// loop over sparse rows indices through index pointer array
// (amounts to looping over cols k of sparse matrix)
// (amounts to looping over cols k of sparse matrix)
...
@@ -403,7 +403,7 @@ class StructuredDotCSR(gof.Op):
...
@@ -403,7 +403,7 @@ class StructuredDotCSR(gof.Op):
const dtype_
%(a_val)
s Amk = Dval[k_idx * Sval]; // actual value at that location
const dtype_
%(a_val)
s Amk = Dval[k_idx * Sval]; // actual value at that location
// get pointer to k-th row of dense matrix
// get pointer to k-th row of dense matrix
const dtype_
%(b)
s* __restrict__ bk = (dtype_
%(b)
s*)(
%(b)
s->data +
%(b)
s->strides
[0] * k);
const dtype_
%(b)
s* __restrict__ bk = (dtype_
%(b)
s*)(
PyArray_BYTES(
%(b)
s) + PyArray_STRIDES(
%(b)
s)
[0] * k);
// loop over final dimension (cols of dense matrix) and perform dot product
// loop over final dimension (cols of dense matrix) and perform dot product
for(npy_int32 n = 0; n < N; ++n)
for(npy_int32 n = 0; n < N; ++n)
...
@@ -566,25 +566,25 @@ class UsmmCscDense(gof.Op):
...
@@ -566,25 +566,25 @@ class UsmmCscDense(gof.Op):
if (PyArray_NDIM(
%(x_nrows)
s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0");
%(fail)
s;}
if (PyArray_NDIM(
%(x_nrows)
s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0");
%(fail)
s;}
if (PyArray_NDIM(
%(y)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2");
%(fail)
s;}
if (PyArray_NDIM(
%(y)
s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2");
%(fail)
s;}
if (PyArray_
DESCR(
%(x_val)
s)->type_num
!=
%(typenum_x_val)
s) {
if (PyArray_
TYPE(
%(x_val)
s)
!=
%(typenum_x_val)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val");
%(fail)
s;}
if (PyArray_
DESCR(
%(y)
s)->type_num
!=
%(typenum_y)
s) {
if (PyArray_
TYPE(
%(y)
s)
!=
%(typenum_y)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y");
%(fail)
s;}
if (PyArray_
DESCR(
%(z)
s)->type_num
!=
%(typenum_z)
s) {
if (PyArray_
TYPE(
%(z)
s)
!=
%(typenum_z)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z");
%(fail)
s;}
if (PyArray_
DESCR(
%(alpha)
s)->type_num
!=
%(typenum_alpha)
s) {
if (PyArray_
TYPE(
%(alpha)
s)
!=
%(typenum_alpha)
s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha");
%(fail)
s;}
if (PyArray_
DESCR(
%(x_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(x_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(x_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(x_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(x_nrows)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(x_nrows)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32");
%(fail)
s;}
if (PyArray_DIMS(
%(x_val)
s)[0] != PyArray_DIMS(
%(x_ind)
s)[0])
if (PyArray_DIMS(
%(x_val)
s)[0] != PyArray_DIMS(
%(x_ind)
s)[0])
...
@@ -593,7 +593,7 @@ class UsmmCscDense(gof.Op):
...
@@ -593,7 +593,7 @@ class UsmmCscDense(gof.Op):
if (PyArray_DIMS(
%(x_ptr)
s)[0] != PyArray_DIMS(
%(y)
s)[0]+1)
if (PyArray_DIMS(
%(x_ptr)
s)[0] != PyArray_DIMS(
%(y)
s)[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows");
%(fail)
s;}
if (PyArray_DIMS(
%(z)
s)[0] != ((npy_int32 *)
%(x_nrows)
s->data
)[0] || PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(y)
s)[1])
if (PyArray_DIMS(
%(z)
s)[0] != ((npy_int32 *)
PyArray_DATA(
%(x_nrows)
s)
)[0] || PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(y)
s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size.");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size.");
%(fail)
s;}
if (PyArray_SIZE(
%(alpha)
s) != 1)
if (PyArray_SIZE(
%(alpha)
s) != 1)
...
@@ -621,13 +621,13 @@ class UsmmCscDense(gof.Op):
...
@@ -621,13 +621,13 @@ class UsmmCscDense(gof.Op):
Py_INCREF(
%(zn)
s);
Py_INCREF(
%(zn)
s);
}
}
else if (!
%(zn)
s
else if (!
%(zn)
s
|| (PyArray_DIMS(
%(zn)
s)[0] != ((npy_int32 *)
%(x_nrows)
s->data
)[0])
|| (PyArray_DIMS(
%(zn)
s)[0] != ((npy_int32 *)
PyArray_DATA(
%(x_nrows)
s)
)[0])
|| (PyArray_DIMS(
%(zn)
s)[1] != PyArray_DIMS(
%(y)
s)[1])
|| (PyArray_DIMS(
%(zn)
s)[1] != PyArray_DIMS(
%(y)
s)[1])
)
)
{
{
{Py_XDECREF(
%(zn)
s);}
{Py_XDECREF(
%(zn)
s);}
npy_intp dims[] = {0, 0};
npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)
%(x_nrows)
s->data
)[0];
dims[0] = ((npy_int32 *)
PyArray_DATA(
%(x_nrows)
s)
)[0];
dims[1] = PyArray_DIMS(
%(y)
s)[1];
dims[1] = PyArray_DIMS(
%(y)
s)[1];
%(zn)
s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
%(typenum_zn)
s);
%(zn)
s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
%(typenum_zn)
s);
}
}
...
@@ -639,17 +639,17 @@ class UsmmCscDense(gof.Op):
...
@@ -639,17 +639,17 @@ class UsmmCscDense(gof.Op):
npy_intp K = PyArray_DIMS(
%(y)
s)[0];
npy_intp K = PyArray_DIMS(
%(y)
s)[0];
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
const dtype_
%(x_val)
s* __restrict__ Dval = (dtype_
%(x_val)
s*)
%(x_val)
s->data
;
const dtype_
%(x_val)
s* __restrict__ Dval = (dtype_
%(x_val)
s*)
PyArray_DATA(
%(x_val)
s)
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
%(x_ind)
s->data
;
const npy_int32 * __restrict__ Dind = (npy_int32*)
PyArray_DATA(
%(x_ind)
s)
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
%(x_ptr)
s->data
;
const npy_int32 * __restrict__ Dptr = (npy_int32*)
PyArray_DATA(
%(x_ptr)
s)
;
const dtype_
%(alpha)
s alpha = ((dtype_
%(alpha)
s*)
%(alpha)
s->data
)[0];
const dtype_
%(alpha)
s alpha = ((dtype_
%(alpha)
s*)
PyArray_DATA(
%(alpha)
s)
)[0];
npy_intp Sz =
%(z)
s->strides
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sz =
PyArray_STRIDES(
%(z)
s)
[1] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Szn =
%(zn)
s->strides
[1] / PyArray_DESCR(
%(zn)
s)->elsize;
npy_intp Szn =
PyArray_STRIDES(
%(zn)
s)
[1] / PyArray_DESCR(
%(zn)
s)->elsize;
npy_intp Sval =
%(x_val)
s->strides
[0] / PyArray_DESCR(
%(x_val)
s)->elsize;
npy_intp Sval =
PyArray_STRIDES(
%(x_val)
s)
[0] / PyArray_DESCR(
%(x_val)
s)->elsize;
npy_intp Sind =
%(x_ind)
s->strides
[0] / PyArray_DESCR(
%(x_ind)
s)->elsize;
npy_intp Sind =
PyArray_STRIDES(
%(x_ind)
s)
[0] / PyArray_DESCR(
%(x_ind)
s)->elsize;
npy_intp Sptr =
%(x_ptr)
s->strides
[0] / PyArray_DESCR(
%(x_ptr)
s)->elsize;
npy_intp Sptr =
PyArray_STRIDES(
%(x_ptr)
s)
[0] / PyArray_DESCR(
%(x_ptr)
s)->elsize;
npy_intp Sy =
%(y)
s->strides
[1] / PyArray_DESCR(
%(y)
s)->elsize;
npy_intp Sy =
PyArray_STRIDES(
%(y)
s)
[1] / PyArray_DESCR(
%(y)
s)->elsize;
if (!(
%(inplace)
s))
if (!(
%(inplace)
s))
...
@@ -669,14 +669,14 @@ class UsmmCscDense(gof.Op):
...
@@ -669,14 +669,14 @@ class UsmmCscDense(gof.Op):
const dtype_
%(x_val)
s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location
const dtype_
%(x_val)
s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location
dtype_
%(y)
s* y_row = (dtype_
%(y)
s*)(
%(y)
s->data +
%(y)
s->strides
[0] * k);
dtype_
%(y)
s* y_row = (dtype_
%(y)
s*)(
PyArray_BYTES(
%(y)
s) + PyArray_STRIDES(
%(y)
s)
[0] * k);
// axpy expects pointer to the beginning of memory arrays,
// axpy expects pointer to the beginning of memory arrays,
// so when the stride is negative, we need to get the
// so when the stride is negative, we need to get the
// last element
// last element
if (Sy < 0)
if (Sy < 0)
y_row += (K - 1) * Sy;
y_row += (K - 1) * Sy;
dtype_
%(zn)
s* z_row = (dtype_
%(zn)
s*)(
%(zn)
s->data +
%(zn)
s->strides
[0] * m);
dtype_
%(zn)
s* z_row = (dtype_
%(zn)
s*)(
PyArray_BYTES(
%(zn)
s) + PyArray_STRIDES(
%(zn)
s)
[0] * m);
if (Szn < 0)
if (Szn < 0)
z_row += (N - 1) * Szn;
z_row += (N - 1) * Szn;
...
@@ -775,16 +775,16 @@ class CSMGradC(gof.Op):
...
@@ -775,16 +775,16 @@ class CSMGradC(gof.Op):
if (PyArray_NDIM(
%(b_ind)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(b_ind)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(b_ptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1");
%(fail)
s;}
if (PyArray_NDIM(
%(b_ptr)
s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(a_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(a_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(a_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(b_ind)
s)->type_num
!= NPY_INT32) {
if (PyArray_
TYPE(
%(b_ind)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32");
%(fail)
s;}
if (PyArray_
DESCR(
%(b_ptr)
s)->type_num
!= NPY_INT32)
if (PyArray_
TYPE(
%(b_ptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32");
%(fail)
s;}
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
if (PyArray_DIMS(
%(a_val)
s)[0] != PyArray_DIMS(
%(a_ind)
s)[0])
...
@@ -807,28 +807,28 @@ class CSMGradC(gof.Op):
...
@@ -807,28 +807,28 @@ class CSMGradC(gof.Op):
{
{
// sparse array has size MxK, dense KxN, output MxN
// sparse array has size MxK, dense KxN, output MxN
npy_intp M = PyArray_DIMS(
%(a_ptr)
s)[0] - 1;
npy_intp M = PyArray_DIMS(
%(a_ptr)
s)[0] - 1;
npy_intp a_dim_0 = ((npy_int32 *)
%(a_dim)
s->data
)[0];
npy_intp a_dim_0 = ((npy_int32 *)
PyArray_DATA(
%(a_dim)
s)
)[0];
npy_intp a_dim_1 = ((npy_int32 *)
%(a_dim)
s->data
)[1];
npy_intp a_dim_1 = ((npy_int32 *)
PyArray_DATA(
%(a_dim)
s)
)[1];
npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
// strides tell you how many bytes to skip to go to next column/row entry
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Sz =
%(z)
s->strides
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sz =
PyArray_STRIDES(
%(z)
s)
[0] / PyArray_DESCR(
%(z)
s)->elsize;
npy_intp Sa_val =
%(a_val)
s->strides
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sa_val =
PyArray_STRIDES(
%(a_val)
s)
[0] / PyArray_DESCR(
%(a_val)
s)->elsize;
npy_intp Sa_ind =
%(a_ind)
s->strides
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sa_ind =
PyArray_STRIDES(
%(a_ind)
s)
[0] / PyArray_DESCR(
%(a_ind)
s)->elsize;
npy_intp Sa_ptr =
%(a_ptr)
s->strides
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
npy_intp Sa_ptr =
PyArray_STRIDES(
%(a_ptr)
s)
[0] / PyArray_DESCR(
%(a_ptr)
s)->elsize;
npy_intp Sb_val =
%(b_val)
s->strides
[0] / PyArray_DESCR(
%(b_val)
s)->elsize;
npy_intp Sb_val =
PyArray_STRIDES(
%(b_val)
s)
[0] / PyArray_DESCR(
%(b_val)
s)->elsize;
npy_intp Sb_ind =
%(b_ind)
s->strides
[0] / PyArray_DESCR(
%(b_ind)
s)->elsize;
npy_intp Sb_ind =
PyArray_STRIDES(
%(b_ind)
s)
[0] / PyArray_DESCR(
%(b_ind)
s)->elsize;
npy_intp Sb_ptr =
%(b_ptr)
s->strides
[0] / PyArray_DESCR(
%(b_ptr)
s)->elsize;
npy_intp Sb_ptr =
PyArray_STRIDES(
%(b_ptr)
s)
[0] / PyArray_DESCR(
%(b_ptr)
s)->elsize;
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
%(z)
s->data
;
dtype_
%(z)
s* __restrict__ Dz = (dtype_
%(z)
s*)
PyArray_DATA(
%(z)
s)
;
const dtype_
%(a_val)
s* __restrict__ Da_val = (dtype_
%(a_val)
s*)
%(a_val)
s->data
;
const dtype_
%(a_val)
s* __restrict__ Da_val = (dtype_
%(a_val)
s*)
PyArray_DATA(
%(a_val)
s)
;
const npy_int32 * __restrict__ Da_ind = (npy_int32*)
%(a_ind)
s->data
;
const npy_int32 * __restrict__ Da_ind = (npy_int32*)
PyArray_DATA(
%(a_ind)
s)
;
const npy_int32 * __restrict__ Da_ptr = (npy_int32*)
%(a_ptr)
s->data
;
const npy_int32 * __restrict__ Da_ptr = (npy_int32*)
PyArray_DATA(
%(a_ptr)
s)
;
const dtype_
%(b_val)
s* __restrict__ Db_val = (dtype_
%(b_val)
s*)
%(b_val)
s->data
;
const dtype_
%(b_val)
s* __restrict__ Db_val = (dtype_
%(b_val)
s*)
PyArray_DATA(
%(b_val)
s)
;
const npy_int32 * __restrict__ Db_ind = (npy_int32*)
%(b_ind)
s->data
;
const npy_int32 * __restrict__ Db_ind = (npy_int32*)
PyArray_DATA(
%(b_ind)
s)
;
const npy_int32 * __restrict__ Db_ptr = (npy_int32*)
%(b_ptr)
s->data
;
const npy_int32 * __restrict__ Db_ptr = (npy_int32*)
PyArray_DATA(
%(b_ptr)
s)
;
npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
npy_intp nnz = PyArray_DIMS(
%(a_ind)
s)[0];
...
@@ -937,10 +937,10 @@ class MulSDCSC(gof.Op):
...
@@ -937,10 +937,10 @@ class MulSDCSC(gof.Op):
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
%(fail)
s;}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if (!
%(_zout)
s ||
if (!
%(_zout)
s ||
...
@@ -949,7 +949,7 @@ class MulSDCSC(gof.Op):
...
@@ -949,7 +949,7 @@ class MulSDCSC(gof.Op):
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_b)
s)->type_num
);
PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_b)
s)
);
if (!
%(_zout)
s)
if (!
%(_zout)
s)
{
{
PyErr_SetString(PyExc_MemoryError,
PyErr_SetString(PyExc_MemoryError,
...
@@ -963,13 +963,13 @@ class MulSDCSC(gof.Op):
...
@@ -963,13 +963,13 @@ class MulSDCSC(gof.Op):
//TODO: error checking with this
//TODO: error checking with this
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
%(_zout)
s->data
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
PyArray_DATA(
%(_zout)
s)
;
const npy_intp Sb =
%(_b)
s->strides
[0];
const npy_intp Sb =
PyArray_STRIDES(
%(_b)
s)
[0];
// loop over columns
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
...
@@ -981,7 +981,7 @@ class MulSDCSC(gof.Op):
...
@@ -981,7 +981,7 @@ class MulSDCSC(gof.Op):
npy_int32 i = indices[i_idx];
npy_int32 i = indices[i_idx];
// extract i-th row of dense matrix
// extract i-th row of dense matrix
const dtype_
%(_b)
s* __restrict__ b_row = (dtype_
%(_b)
s*)(
%(_b)
s->data
+ Sb * i);
const dtype_
%(_b)
s* __restrict__ b_row = (dtype_
%(_b)
s*)(
PyArray_BYTES(
%(_b)
s)
+ Sb * i);
// write resulting gradient to sparse output
// write resulting gradient to sparse output
zout[i_idx] = data[i_idx] * b_row[j];
zout[i_idx] = data[i_idx] * b_row[j];
...
@@ -1053,10 +1053,10 @@ class MulSDCSR(gof.Op):
...
@@ -1053,10 +1053,10 @@ class MulSDCSR(gof.Op):
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)
s;}
%(fail)
s;}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if (!
%(_zout)
s ||
if (!
%(_zout)
s ||
...
@@ -1065,7 +1065,7 @@ class MulSDCSR(gof.Op):
...
@@ -1065,7 +1065,7 @@ class MulSDCSR(gof.Op):
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_b)
s)->type_num
);
PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_b)
s)
);
if (!
%(_zout)
s)
if (!
%(_zout)
s)
{
{
PyErr_SetString(PyExc_MemoryError,
PyErr_SetString(PyExc_MemoryError,
...
@@ -1079,19 +1079,19 @@ class MulSDCSR(gof.Op):
...
@@ -1079,19 +1079,19 @@ class MulSDCSR(gof.Op):
//TODO: error checking with this
//TODO: error checking with this
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
%(_zout)
s->data
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
PyArray_DATA(
%(_zout)
s)
;
const npy_intp Sb =
%(_b)
s->strides
[0];
const npy_intp Sb =
PyArray_STRIDES(
%(_b)
s)
[0];
// loop over columns
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
{
{
// extract i-th row of dense matrix
// extract i-th row of dense matrix
const dtype_
%(_b)
s* __restrict__ b_row = (dtype_
%(_b)
s*)(
%(_b)
s->data
+ Sb * j);
const dtype_
%(_b)
s* __restrict__ b_row = (dtype_
%(_b)
s*)(
PyArray_BYTES(
%(_b)
s)
+ Sb * j);
// for each non-null value in the sparse column
// for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
...
@@ -1209,10 +1209,10 @@ class MulSVCSR(gof.Op):
...
@@ -1209,10 +1209,10 @@ class MulSVCSR(gof.Op):
%(fail)
s;
%(fail)
s;
}
}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if (!
%(_zout)
s
if (!
%(_zout)
s
...
@@ -1221,7 +1221,7 @@ class MulSVCSR(gof.Op):
...
@@ -1221,7 +1221,7 @@ class MulSVCSR(gof.Op):
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_b)
s)->type_num
);
PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_b)
s)
);
}
}
{ //makes it compile even though labels jump over variable definitions.
{ //makes it compile even though labels jump over variable definitions.
...
@@ -1229,15 +1229,15 @@ class MulSVCSR(gof.Op):
...
@@ -1229,15 +1229,15 @@ class MulSVCSR(gof.Op):
//TODO: error checking with this
//TODO: error checking with this
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
const dtype_
%(_b)
s* __restrict__ Db = (dtype_
%(_b)
s*)
%(_b)
s->data
;
const dtype_
%(_b)
s* __restrict__ Db = (dtype_
%(_b)
s*)
PyArray_DATA(
%(_b)
s)
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
%(_zout)
s->data
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
PyArray_DATA(
%(_zout)
s)
;
const npy_intp Sb =
%(_b)
s->strides
[0] / PyArray_DESCR(
%(_b)
s)->elsize;
const npy_intp Sb =
PyArray_STRIDES(
%(_b)
s)
[0] / PyArray_DESCR(
%(_b)
s)->elsize;
// loop over rows
// loop over rows
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
...
@@ -1359,10 +1359,10 @@ class StructuredAddSVCSR(gof.Op):
...
@@ -1359,10 +1359,10 @@ class StructuredAddSVCSR(gof.Op):
%(fail)
s;
%(fail)
s;
}
}
if( PyArray_
DESCR(
%(_indices)
s)->type_num
!= NPY_INT32) {
if( PyArray_
TYPE(
%(_indices)
s)
!= NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "C");
%(fail)
s;}
if( PyArray_
DESCR(
%(_indptr)
s)->type_num
!= NPY_INT32)
if( PyArray_
TYPE(
%(_indptr)
s)
!= NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
{PyErr_SetString(PyExc_NotImplementedError, "D");
%(fail)
s;}
if (!
%(_zout)
s
if (!
%(_zout)
s
...
@@ -1371,7 +1371,7 @@ class StructuredAddSVCSR(gof.Op):
...
@@ -1371,7 +1371,7 @@ class StructuredAddSVCSR(gof.Op):
{
{
Py_XDECREF(
%(_zout)
s);
Py_XDECREF(
%(_zout)
s);
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_zout)
s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(
%(_indices)
s), PyArray_
DESCR(
%(_b)
s)->type_num
);
PyArray_DIMS(
%(_indices)
s), PyArray_
TYPE(
%(_b)
s)
);
if (!
%(_zout)
s)
if (!
%(_zout)
s)
{
{
PyErr_SetString(PyExc_MemoryError,
PyErr_SetString(PyExc_MemoryError,
...
@@ -1385,15 +1385,15 @@ class StructuredAddSVCSR(gof.Op):
...
@@ -1385,15 +1385,15 @@ class StructuredAddSVCSR(gof.Op):
//TODO: error checking with this
//TODO: error checking with this
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const npy_intp N = PyArray_DIMS(
%(_indptr)
s)[0]-1;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
%(_data)
s->data
;
const dtype_
%(_data)
s * const __restrict__ data = (dtype_
%(_data)
s*)
PyArray_DATA(
%(_data)
s)
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
%(_indptr)
s->data
;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)
PyArray_DATA(
%(_indptr)
s)
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
%(_indices)
s->data
;
const npy_int32 * const __restrict__ indices = (npy_int32 *)
PyArray_DATA(
%(_indices)
s)
;
const dtype_
%(_b)
s* __restrict__ Db = (dtype_
%(_b)
s*)
%(_b)
s->data
;
const dtype_
%(_b)
s* __restrict__ Db = (dtype_
%(_b)
s*)
PyArray_DATA(
%(_b)
s)
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
%(_zout)
s->data
;
dtype_
%(_zout)
s * const __restrict__ zout = (dtype_
%(_zout)
s*)
PyArray_DATA(
%(_zout)
s)
;
const npy_intp Sb =
%(_b)
s->strides
[0] / PyArray_DESCR(
%(_b)
s)->elsize;
const npy_intp Sb =
PyArray_STRIDES(
%(_b)
s)
[0] / PyArray_DESCR(
%(_b)
s)->elsize;
// loop over columns
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_int32 j = 0; j < N; ++j)
...
@@ -1575,17 +1575,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
...
@@ -1575,17 +1575,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
if (PyArray_NDIM(
%(y)
s) != 2) {
if (PyArray_NDIM(
%(y)
s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2");
%(fail)
s;}
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2");
%(fail)
s;}
if (PyArray_
DESCR(
%(x)
s)->type_num
!=
%(typenum_x)
s) {
if (PyArray_
TYPE(
%(x)
s)
!=
%(typenum_x)
s) {
PyErr_SetString(PyExc_NotImplementedError,
PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for x");
"Invalid type for x");
%(fail)
s;}
%(fail)
s;}
if (PyArray_
DESCR(
%(y)
s)->type_num
!=
%(typenum_y)
s) {
if (PyArray_
TYPE(
%(y)
s)
!=
%(typenum_y)
s) {
PyErr_SetString(PyExc_NotImplementedError,
PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for y");
"Invalid type for y");
%(fail)
s;}
%(fail)
s;}
if (PyArray_
DESCR(
%(p_data)
s)->type_num
!=
%(typenum_p)
s) {
if (PyArray_
TYPE(
%(p_data)
s)
!=
%(typenum_p)
s) {
PyErr_SetString(PyExc_NotImplementedError,
PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for pattern");
"Invalid type for pattern");
%(fail)
s;}
%(fail)
s;}
...
@@ -1595,7 +1595,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1595,7 +1595,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
"x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
"x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
%(fail)
s;}
%(fail)
s;}
if (PyArray_DIMS(
%(y)
s)[0] != ((npy_int32 *)
%(p_ncols)
s->data
)[0] ||
if (PyArray_DIMS(
%(y)
s)[0] != ((npy_int32 *)
PyArray_DATA(
%(p_ncols)
s)
)[0] ||
PyArray_DIMS(
%(x)
s)[0] != (PyArray_DIMS(
%(p_ptr)
s)[0] - 1))
PyArray_DIMS(
%(x)
s)[0] != (PyArray_DIMS(
%(p_ptr)
s)[0] - 1))
{PyErr_SetString(PyExc_NotImplementedError,
{PyErr_SetString(PyExc_NotImplementedError,
"The dimension of the pattern and the output must match");
%(fail)
s;}
"The dimension of the pattern and the output must match");
%(fail)
s;}
...
@@ -1603,7 +1603,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1603,7 +1603,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
// Allocate output
// Allocate output
if (!
%(z_data)
s
if (!
%(z_data)
s
|| (PyArray_DIMS(
%(z_data)
s)[0] != PyArray_DIMS(
%(p_data)
s)[0])
|| (PyArray_DIMS(
%(z_data)
s)[0] != PyArray_DIMS(
%(p_data)
s)[0])
|| (PyArray_
DESCR(
%(z_data)
s)->type_num
!=
%(typenum_zd)
s)
|| (PyArray_
TYPE(
%(z_data)
s)
!=
%(typenum_zd)
s)
|| !(PyArray_ISCONTIGUOUS(
%(z_data)
s)))
|| !(PyArray_ISCONTIGUOUS(
%(z_data)
s)))
{
{
{Py_XDECREF(
%(z_data)
s);}
{Py_XDECREF(
%(z_data)
s);}
...
@@ -1614,7 +1614,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1614,7 +1614,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
}
}
if (!
%(z_ind)
s
if (!
%(z_ind)
s
|| (PyArray_DIMS(
%(z_ind)
s)[0] != PyArray_DIMS(
%(p_ind)
s)[0])
|| (PyArray_DIMS(
%(z_ind)
s)[0] != PyArray_DIMS(
%(p_ind)
s)[0])
|| (PyArray_
DESCR(
%(z_ind)
s)->type_num
!=
%(typenum_zi)
s)
|| (PyArray_
TYPE(
%(z_ind)
s)
!=
%(typenum_zi)
s)
|| !(PyArray_ISCONTIGUOUS(
%(z_ind)
s)))
|| !(PyArray_ISCONTIGUOUS(
%(z_ind)
s)))
{
{
{Py_XDECREF(
%(z_ind)
s);}
{Py_XDECREF(
%(z_ind)
s);}
...
@@ -1625,7 +1625,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1625,7 +1625,7 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
}
}
if (!
%(z_ptr)
s
if (!
%(z_ptr)
s
|| (PyArray_DIMS(
%(z_ptr)
s)[0] != PyArray_DIMS(
%(p_ptr)
s)[0])
|| (PyArray_DIMS(
%(z_ptr)
s)[0] != PyArray_DIMS(
%(p_ptr)
s)[0])
|| (PyArray_
DESCR(
%(z_ptr)
s)->type_num
!=
%(typenum_zp)
s)
|| (PyArray_
TYPE(
%(z_ptr)
s)
!=
%(typenum_zp)
s)
|| !(PyArray_ISCONTIGUOUS(
%(z_ptr)
s)))
|| !(PyArray_ISCONTIGUOUS(
%(z_ptr)
s)))
{
{
{Py_XDECREF(
%(z_ptr)
s);}
{Py_XDECREF(
%(z_ptr)
s);}
...
@@ -1642,23 +1642,23 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1642,23 +1642,23 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
npy_intp K = PyArray_DIMS(
%(y)
s)[1];
npy_intp K = PyArray_DIMS(
%(y)
s)[1];
// pointers to access actual data in the arrays passed as params.
// pointers to access actual data in the arrays passed as params.
const dtype_
%(x)
s* __restrict__ Dx = (dtype_
%(x)
s*)
%(x)
s->data
;
const dtype_
%(x)
s* __restrict__ Dx = (dtype_
%(x)
s*)
PyArray_DATA(
%(x)
s)
;
const dtype_
%(y)
s* __restrict__ Dy = (dtype_
%(y)
s*)
%(y)
s->data
;
const dtype_
%(y)
s* __restrict__ Dy = (dtype_
%(y)
s*)
PyArray_DATA(
%(y)
s)
;
const dtype_
%(p_data)
s* __restrict__ Dpd = (dtype_
%(p_data)
s*)
%(p_data)
s->data
;
const dtype_
%(p_data)
s* __restrict__ Dpd = (dtype_
%(p_data)
s*)
PyArray_DATA(
%(p_data)
s)
;
const dtype_
%(p_ind)
s* __restrict__ Dpi = (dtype_
%(p_ind)
s*)
%(p_ind)
s->data
;
const dtype_
%(p_ind)
s* __restrict__ Dpi = (dtype_
%(p_ind)
s*)
PyArray_DATA(
%(p_ind)
s)
;
const dtype_
%(p_ptr)
s* __restrict__ Dpp = (dtype_
%(p_ptr)
s*)
%(p_ptr)
s->data
;
const dtype_
%(p_ptr)
s* __restrict__ Dpp = (dtype_
%(p_ptr)
s*)
PyArray_DATA(
%(p_ptr)
s)
;
dtype_
%(z_data)
s* __restrict__ Dzd = (dtype_
%(z_data)
s*)
%(z_data)
s->data
;
dtype_
%(z_data)
s* __restrict__ Dzd = (dtype_
%(z_data)
s*)
PyArray_DATA(
%(z_data)
s)
;
dtype_
%(z_ind)
s* __restrict__ Dzi = (dtype_
%(z_ind)
s*)
%(z_ind)
s->data
;
dtype_
%(z_ind)
s* __restrict__ Dzi = (dtype_
%(z_ind)
s*)
PyArray_DATA(
%(z_ind)
s)
;
dtype_
%(z_ptr)
s* __restrict__ Dzp = (dtype_
%(z_ptr)
s*)
%(z_ptr)
s->data
;
dtype_
%(z_ptr)
s* __restrict__ Dzp = (dtype_
%(z_ptr)
s*)
PyArray_DATA(
%(z_ptr)
s)
;
const npy_intp Sdx =
%(x)
s->strides
[1]/PyArray_DESCR(
%(x)
s)->elsize;
const npy_intp Sdx =
PyArray_STRIDES(
%(x)
s)
[1]/PyArray_DESCR(
%(x)
s)->elsize;
const npy_intp Sdy =
%(y)
s->strides
[1]/PyArray_DESCR(
%(y)
s)->elsize;
const npy_intp Sdy =
PyArray_STRIDES(
%(y)
s)
[1]/PyArray_DESCR(
%(y)
s)->elsize;
const npy_intp Sdpd =
%(p_data)
s->strides
[0] / PyArray_DESCR(
%(p_data)
s)->elsize;
const npy_intp Sdpd =
PyArray_STRIDES(
%(p_data)
s)
[0] / PyArray_DESCR(
%(p_data)
s)->elsize;
const npy_intp Sdpi =
%(p_ind)
s->strides
[0] / PyArray_DESCR(
%(p_ind)
s)->elsize;
const npy_intp Sdpi =
PyArray_STRIDES(
%(p_ind)
s)
[0] / PyArray_DESCR(
%(p_ind)
s)->elsize;
const npy_intp Sdpp =
%(p_ptr)
s->strides
[0] / PyArray_DESCR(
%(p_ptr)
s)->elsize;
const npy_intp Sdpp =
PyArray_STRIDES(
%(p_ptr)
s)
[0] / PyArray_DESCR(
%(p_ptr)
s)->elsize;
const npy_intp Sdzd =
%(z_data)
s->strides
[0] / PyArray_DESCR(
%(z_data)
s)->elsize;
const npy_intp Sdzd =
PyArray_STRIDES(
%(z_data)
s)
[0] / PyArray_DESCR(
%(z_data)
s)->elsize;
const npy_intp Sdzi =
%(z_ind)
s->strides
[0] / PyArray_DESCR(
%(z_ind)
s)->elsize;
const npy_intp Sdzi =
PyArray_STRIDES(
%(z_ind)
s)
[0] / PyArray_DESCR(
%(z_ind)
s)->elsize;
const npy_intp Sdzp =
%(z_ptr)
s->strides
[0] / PyArray_DESCR(
%(z_ptr)
s)->elsize;
const npy_intp Sdzp =
PyArray_STRIDES(
%(z_ptr)
s)
[0] / PyArray_DESCR(
%(z_ptr)
s)->elsize;
memcpy(Dzi, Dpi, PyArray_DIMS(
%(p_ind)
s)[0]*sizeof(dtype_
%(p_ind)
s));
memcpy(Dzi, Dpi, PyArray_DIMS(
%(p_ind)
s)[0]*sizeof(dtype_
%(p_ind)
s));
memcpy(Dzp, Dpp, PyArray_DIMS(
%(p_ptr)
s)[0]*sizeof(dtype_
%(p_ptr)
s));
memcpy(Dzp, Dpp, PyArray_DIMS(
%(p_ptr)
s)[0]*sizeof(dtype_
%(p_ptr)
s));
...
@@ -1667,9 +1667,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
...
@@ -1667,9 +1667,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
const dtype_
%(x)
s* x_row = (dtype_
%(x)
s*)(
%(x)
s->data +
%(x)
s->strides
[0] * m);
const dtype_
%(x)
s* x_row = (dtype_
%(x)
s*)(
PyArray_DATA(
%(x)
s) + PyArray_STRIDES(
%(x)
s)
[0] * m);
const dtype_
%(y)
s* y_col = (dtype_
%(y)
s*)(
%(y)
s->data +
%(y)
s->strides
[0] * n);
const dtype_
%(y)
s* y_col = (dtype_
%(y)
s*)(
PyArray_DATA(
%(y)
s) + PyArray_STRIDES(
%(y)
s)
[0] * n);
Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] *
%(cdot)
s((int*)&K, (const
%(conv_type)
s*)x_row, (int*)&Sdx, (const
%(conv_type)
s*)y_col, (int*)&Sdy);
Dzd[n_idx * Sdzd] = Dpd[n_idx * Sdpd] *
%(cdot)
s((int*)&K, (const
%(conv_type)
s*)x_row, (int*)&Sdx, (const
%(conv_type)
s*)y_col, (int*)&Sdy);
}
}
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论