提交 0cce9c79 authored 作者: Rebecca N. Palmer's avatar Rebecca N. Palmer

fix some more index overflows in sparse/opt.py

上级 688c897f
......@@ -337,6 +337,8 @@ class StructuredDotCSC(gof.Op):
npy_intp M = PyArray_DIMS(%(z)s)[0];
npy_intp N = PyArray_DIMS(%(z)s)[1];
npy_intp K = PyArray_DIMS(%(b)s)[0];
if (N > 0x7fffffffL)
{PyErr_SetString(PyExc_NotImplementedError, "array too big (overflows int32 index)"); %(fail)s;}
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
......@@ -414,7 +416,7 @@ class StructuredDotCSC(gof.Op):
return rval
def c_code_cache_version(self):
return (2,)
return (3,)
sd_csc = StructuredDotCSC()
......@@ -530,6 +532,8 @@ class StructuredDotCSR(gof.Op):
npy_intp M = PyArray_DIMS(%(z)s)[0];
npy_intp N = PyArray_DIMS(%(z)s)[1];
npy_intp K = PyArray_DIMS(%(b)s)[0];
if (N > 0x7fffffffL)
{PyErr_SetString(PyExc_NotImplementedError, "array too big (overflows int32 index)"); %(fail)s;}
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
......@@ -591,7 +595,7 @@ class StructuredDotCSR(gof.Op):
""" % dict(locals(), **sub)
def c_code_cache_version(self):
return (1,)
return (2,)
sd_csr = StructuredDotCSR()
......@@ -833,6 +837,8 @@ class UsmmCscDense(gof.Op):
npy_intp Sy = PyArray_STRIDES(%(y)s)[1] / PyArray_DESCR(%(y)s)->elsize;
// blas expects ints; convert here (rather than just making N etc ints) to avoid potential overflow in the negative-stride correction
if ((N > 0x7fffffffL)||(Sy > 0x7fffffffL)||(Szn > 0x7fffffffL)||(Sy < -0x7fffffffL)||(Szn < -0x7fffffffL))
{PyErr_SetString(PyExc_NotImplementedError, "array too big for BLAS (overflows int32 index)"); %(fail)s;}
int N32 = N;
int Sy32 = Sy;
int Szn32 = Szn;
......@@ -846,7 +852,7 @@ class UsmmCscDense(gof.Op):
}
}
for (npy_int32 k = 0; k < K; ++k)
for (npy_intp k = 0; k < K; ++k)
{
for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1)*Sptr]; ++m_idx)
{
......@@ -874,7 +880,7 @@ class UsmmCscDense(gof.Op):
return rval
def c_code_cache_version(self):
return (2, blas.blas_header_version())
return (3, blas.blas_header_version())
usmm_csc_dense = UsmmCscDense(inplace=False)
usmm_csc_dense_inplace = UsmmCscDense(inplace=True)
......@@ -1107,7 +1113,7 @@ class MulSDCSC(gof.Op):
[tensor.tensor(b.dtype, (False,))])
def c_code_cache_version(self):
return (2,)
return (3,)
# def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
# return NotImplementedError()
......@@ -1170,7 +1176,7 @@ class MulSDCSC(gof.Op):
const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0];
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_intp j = 0; j < N; ++j)
{
// for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
......@@ -1233,7 +1239,7 @@ class MulSDCSR(gof.Op):
[tensor.tensor(b.dtype, (False,))])
def c_code_cache_version(self):
return (2,)
return (3,)
# def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
# return NotImplemented()
......@@ -1296,7 +1302,7 @@ class MulSDCSR(gof.Op):
const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0];
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_intp j = 0; j < N; ++j)
{
// extract i-th row of dense matrix
const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(PyArray_BYTES(%(_b)s) + Sb * j);
......@@ -1401,7 +1407,7 @@ class MulSVCSR(gof.Op):
[tensor.tensor(b.dtype, (False,))])
def c_code_cache_version(self):
return (1,)
return (2,)
def c_code(self, node, name, inputs, outputs, sub):
_data, _indices, _indptr, _b, = inputs
......@@ -1460,7 +1466,7 @@ class MulSVCSR(gof.Op):
const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over rows
for (npy_int32 j = 0; j < N; ++j)
for (npy_intp j = 0; j < N; ++j)
{
// for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
......@@ -1559,7 +1565,7 @@ class StructuredAddSVCSR(gof.Op):
[tensor.tensor(b.dtype, (False,))])
def c_code_cache_version(self):
return (2,)
return (3,)
def c_code(self, node, name, inputs, outputs, sub):
_data, _indices, _indptr, _b, = inputs
......@@ -1624,7 +1630,7 @@ class StructuredAddSVCSR(gof.Op):
const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
for (npy_intp j = 0; j < N; ++j)
{
// for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
......@@ -1754,7 +1760,7 @@ class SamplingDotCSR(gof.Op):
])
def c_code_cache_version(self):
return (3, blas.blas_header_version())
return (4, blas.blas_header_version())
def c_support_code(self):
return blas.blas_header_text()
......@@ -1898,11 +1904,13 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s));
// blas expects ints; convert here (rather than just making K etc ints) to avoid potential overflow in the negative-stride correction
if ((K > 0x7fffffffL)||(Sdx > 0x7fffffffL)||(Sdy > 0x7fffffffL)||(Sdx < -0x7fffffffL)||(Sdy < -0x7fffffffL))
{PyErr_SetString(PyExc_NotImplementedError, "array too big for BLAS (overflows int32 index)"); %(fail)s;}
int K32 = K;
int Sdx32 = Sdx;
int Sdy32 = Sdy;
for (npy_int32 m = 0; m < M; ++m) {
for (npy_intp m = 0; m < M; ++m) {
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论