提交 ef44c58e authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Allocate contiguous output if it is needed.

上级 0f5f6ea1
...@@ -885,7 +885,7 @@ class MulSDCSC(gof.Op): ...@@ -885,7 +885,7 @@ class MulSDCSC(gof.Op):
[tensor.tensor(b.dtype, (False,))]) [tensor.tensor(b.dtype, (False,))])
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
#def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)): #def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
# return NotImplementedError() # return NotImplementedError()
...@@ -918,18 +918,20 @@ class MulSDCSC(gof.Op): ...@@ -918,18 +918,20 @@ class MulSDCSC(gof.Op):
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s) if (!%(_zout)s ||
(PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]) ||
!(PyArray_ISCONTIGUOUS(%(_zout)s)))
{ {
Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
} if (!%(_zout)s)
if (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
{ {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_MemoryError,
"somehow _zout got the wrong size.. and I don't know how to resize it."); "Could not allocate output memory.");
%(fail)s; %(fail)s;
} }
}
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0]; const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
...@@ -999,7 +1001,7 @@ class MulSDCSR(gof.Op): ...@@ -999,7 +1001,7 @@ class MulSDCSR(gof.Op):
[tensor.tensor(b.dtype, (False,))]) [tensor.tensor(b.dtype, (False,))])
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
#def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)): #def perform(self, node, (a_data, a_indices, a_indptr, b), (out,)):
# return NotImplemented() # return NotImplemented()
...@@ -1032,18 +1034,20 @@ class MulSDCSR(gof.Op): ...@@ -1032,18 +1034,20 @@ class MulSDCSR(gof.Op):
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s) if (!%(_zout)s ||
(PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]) ||
!(PyArray_ISCONTIGUOUS(%(_zout)s)))
{ {
Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
} if (!%(_zout)s)
if (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
{ {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_MemoryError,
"somehow _zout got the wrong size.. and I don't know how to resize it."); "Could not allocate output memory.");
%(fail)s; %(fail)s;
} }
}
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0]; const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
...@@ -1302,7 +1306,7 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1302,7 +1306,7 @@ class StructuredAddSVCSR(gof.Op):
[tensor.tensor(b.dtype, (False,))]) [tensor.tensor(b.dtype, (False,))])
def c_code_cache_version(self): def c_code_cache_version(self):
return (1,) return (2,)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
_data, _indices, _indptr, _b, = inputs _data, _indices, _indptr, _b, = inputs
...@@ -1336,18 +1340,20 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1336,18 +1340,20 @@ class StructuredAddSVCSR(gof.Op):
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32) if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s) if (!%(_zout)s
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
|| !(PyArray_ISCONTIGUOUS(%(_zout)s)))
{ {
Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num); PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
} if (!%(_zout)s)
if (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
{ {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_MemoryError,
"somehow _zout got the wrong size.. and I don't know how to resize it."); "Could not allocate output memory.");
%(fail)s; %(fail)s;
} }
}
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0]; const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
...@@ -1489,7 +1495,7 @@ class SamplingDotCSR(gof.Op): ...@@ -1489,7 +1495,7 @@ class SamplingDotCSR(gof.Op):
]) ])
def c_code_cache_version(self): def c_code_cache_version(self):
return (1, ) return (2, )
def c_support_code(self): def c_support_code(self):
return blas.blas_header_text() return blas.blas_header_text()
...@@ -1572,7 +1578,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1572,7 +1578,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
// Allocate output // Allocate output
if (!%(z_data)s if (!%(z_data)s
|| (PyArray_DIMS(%(z_data)s)[0] != PyArray_DIMS(%(p_data)s)[0]) || (PyArray_DIMS(%(z_data)s)[0] != PyArray_DIMS(%(p_data)s)[0])
|| (PyArray_DESCR(%(z_data)s)->type_num != %(typenum_zd)s)) { || (PyArray_DESCR(%(z_data)s)->type_num != %(typenum_zd)s)
|| !(PyArray_ISCONTIGUOUS(%(z_data)s)))
{
{Py_XDECREF(%(z_data)s);} {Py_XDECREF(%(z_data)s);}
npy_intp dims[] = {0}; npy_intp dims[] = {0};
dims[0] = PyArray_DIMS(%(p_data)s)[0]; dims[0] = PyArray_DIMS(%(p_data)s)[0];
...@@ -1581,7 +1589,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1581,7 +1589,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
} }
if (!%(z_ind)s if (!%(z_ind)s
|| (PyArray_DIMS(%(z_ind)s)[0] != PyArray_DIMS(%(p_ind)s)[0]) || (PyArray_DIMS(%(z_ind)s)[0] != PyArray_DIMS(%(p_ind)s)[0])
|| (PyArray_DESCR(%(z_ind)s)->type_num != %(typenum_zi)s)) { || (PyArray_DESCR(%(z_ind)s)->type_num != %(typenum_zi)s)
|| !(PyArray_ISCONTIGUOUS(%(z_ind)s)))
{
{Py_XDECREF(%(z_ind)s);} {Py_XDECREF(%(z_ind)s);}
npy_intp dims[] = {0}; npy_intp dims[] = {0};
dims[0] = PyArray_DIMS(%(p_ind)s)[0]; dims[0] = PyArray_DIMS(%(p_ind)s)[0];
...@@ -1590,7 +1600,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1590,7 +1600,9 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
} }
if (!%(z_ptr)s if (!%(z_ptr)s
|| (PyArray_DIMS(%(z_ptr)s)[0] != PyArray_DIMS(%(p_ptr)s)[0]) || (PyArray_DIMS(%(z_ptr)s)[0] != PyArray_DIMS(%(p_ptr)s)[0])
|| (PyArray_DESCR(%(z_ptr)s)->type_num != %(typenum_zp)s)) { || (PyArray_DESCR(%(z_ptr)s)->type_num != %(typenum_zp)s)
|| !(PyArray_ISCONTIGUOUS(%(z_ptr)s)))
{
{Py_XDECREF(%(z_ptr)s);} {Py_XDECREF(%(z_ptr)s);}
npy_intp dims[] = {0}; npy_intp dims[] = {0};
dims[0] = PyArray_DIMS(%(p_ptr)s)[0]; dims[0] = PyArray_DIMS(%(p_ptr)s)[0];
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论