fix some more index overflows in sparse/opt.py

0cce9c79 · Rebecca N. Palmer · 688c897f · 0cce9c79
--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
@@ -337,6 +337,8 @@ class StructuredDotCSC(gof.Op):
            npy_intp M = PyArray_DIMS(%(z)s)[0];
            npy_intp N = PyArray_DIMS(%(z)s)[1];
            npy_intp K = PyArray_DIMS(%(b)s)[0];
+            if (N > 0x7fffffffL)
+            {PyErr_SetString(PyExc_NotImplementedError, "array too big (overflows int32 index)"); %(fail)s;}

            // strides tell you how many bytes to skip to go to next column/row entry
            npy_intp Szm = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
@@ -414,7 +416,7 @@ class StructuredDotCSC(gof.Op):
        return rval

    def c_code_cache_version(self):
-        return (2,)
+        return (3,)
 sd_csc = StructuredDotCSC()


@@ -530,6 +532,8 @@ class StructuredDotCSR(gof.Op):
            npy_intp M = PyArray_DIMS(%(z)s)[0];
            npy_intp N = PyArray_DIMS(%(z)s)[1];
            npy_intp K = PyArray_DIMS(%(b)s)[0];
+            if (N > 0x7fffffffL)
+            {PyErr_SetString(PyExc_NotImplementedError, "array too big (overflows int32 index)"); %(fail)s;}

            // strides tell you how many bytes to skip to go to next column/row entry
            npy_intp Szm = PyArray_STRIDES(%(z)s)[0] / PyArray_DESCR(%(z)s)->elsize;
@@ -591,7 +595,7 @@ class StructuredDotCSR(gof.Op):
        """ % dict(locals(), **sub)

    def c_code_cache_version(self):
-        return (1,)
+        return (2,)
 sd_csr = StructuredDotCSR()


@@ -833,6 +837,8 @@ class UsmmCscDense(gof.Op):
            npy_intp Sy = PyArray_STRIDES(%(y)s)[1] / PyArray_DESCR(%(y)s)->elsize;

            // blas expects ints; convert here (rather than just making N etc ints) to avoid potential overflow in the negative-stride correction
+            if ((N > 0x7fffffffL)||(Sy > 0x7fffffffL)||(Szn > 0x7fffffffL)||(Sy < -0x7fffffffL)||(Szn < -0x7fffffffL))
+            {PyErr_SetString(PyExc_NotImplementedError, "array too big for BLAS (overflows int32 index)"); %(fail)s;}
            int N32 = N;
            int Sy32 = Sy;
            int Szn32 = Szn;
@@ -846,7 +852,7 @@ class UsmmCscDense(gof.Op):
                }
            }

-            for (npy_int32 k = 0; k < K; ++k)
+            for (npy_intp k = 0; k < K; ++k)
            {
                for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1)*Sptr]; ++m_idx)
                {
@@ -874,7 +880,7 @@ class UsmmCscDense(gof.Op):
        return rval

    def c_code_cache_version(self):
-        return (2, blas.blas_header_version())
+        return (3, blas.blas_header_version())
 usmm_csc_dense = UsmmCscDense(inplace=False)
 usmm_csc_dense_inplace = UsmmCscDense(inplace=True)

@@ -1107,7 +1113,7 @@ class MulSDCSC(gof.Op):
                               [tensor.tensor(b.dtype, (False,))])

    def c_code_cache_version(self):
-        return (2,)
+        return (3,)

    # def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
    #    return NotImplementedError()
@@ -1170,7 +1176,7 @@ class MulSDCSC(gof.Op):
            const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0];

            // loop over columns
-            for (npy_int32 j = 0; j < N; ++j)
+            for (npy_intp j = 0; j < N; ++j)
            {
                // for each non-null value in the sparse column
                for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
@@ -1233,7 +1239,7 @@ class MulSDCSR(gof.Op):
                               [tensor.tensor(b.dtype, (False,))])

    def c_code_cache_version(self):
-        return (2,)
+        return (3,)

    # def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
    #    return NotImplemented()
@@ -1296,7 +1302,7 @@ class MulSDCSR(gof.Op):
            const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0];

            // loop over columns
-            for (npy_int32 j = 0; j < N; ++j)
+            for (npy_intp j = 0; j < N; ++j)
            {
                // extract i-th row of dense matrix
                const dtype_%(_b)s* __restrict__ b_row = (dtype_%(_b)s*)(PyArray_BYTES(%(_b)s) + Sb * j);
@@ -1401,7 +1407,7 @@ class MulSVCSR(gof.Op):
                               [tensor.tensor(b.dtype, (False,))])

    def c_code_cache_version(self):
-        return (1,)
+        return (2,)

    def c_code(self, node, name, inputs, outputs, sub):
        _data, _indices, _indptr, _b, = inputs
@@ -1460,7 +1466,7 @@ class MulSVCSR(gof.Op):
            const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0] / PyArray_DESCR(%(_b)s)->elsize;

            // loop over rows
-            for (npy_int32 j = 0; j < N; ++j)
+            for (npy_intp j = 0; j < N; ++j)
            {
                // for each non-null value in the sparse column
                for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
@@ -1559,7 +1565,7 @@ class StructuredAddSVCSR(gof.Op):
                               [tensor.tensor(b.dtype, (False,))])

    def c_code_cache_version(self):
-        return (2,)
+        return (3,)

    def c_code(self, node, name, inputs, outputs, sub):
        _data, _indices, _indptr, _b, = inputs
@@ -1624,7 +1630,7 @@ class StructuredAddSVCSR(gof.Op):
            const npy_intp Sb = PyArray_STRIDES(%(_b)s)[0] / PyArray_DESCR(%(_b)s)->elsize;

            // loop over columns
-            for (npy_int32 j = 0; j < N; ++j)
+            for (npy_intp j = 0; j < N; ++j)
            {
                // for each non-null value in the sparse column
                for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
@@ -1754,7 +1760,7 @@ class SamplingDotCSR(gof.Op):
        ])

    def c_code_cache_version(self):
-        return (3, blas.blas_header_version())
+        return (4, blas.blas_header_version())

    def c_support_code(self):
        return blas.blas_header_text()
@@ -1898,11 +1904,13 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
            memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s));

            // blas expects ints; convert here (rather than just making K etc ints) to avoid potential overflow in the negative-stride correction
+            if ((K > 0x7fffffffL)||(Sdx > 0x7fffffffL)||(Sdy > 0x7fffffffL)||(Sdx < -0x7fffffffL)||(Sdy < -0x7fffffffL))
+            {PyErr_SetString(PyExc_NotImplementedError, "array too big for BLAS (overflows int32 index)"); %(fail)s;}
            int K32 = K;
            int Sdx32 = Sdx;
            int Sdy32 = Sdy;

-            for (npy_int32 m = 0; m < M; ++m) {
+            for (npy_intp m = 0; m < M; ++m) {
                for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
                    const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K