提交 007540a5 authored 作者: lamblin's avatar lamblin

Merge pull request #919 from nouiz/np17

Np17
...@@ -63,7 +63,7 @@ you should check the strides and alignment. ...@@ -63,7 +63,7 @@ you should check the strides and alignment.
return """ return """
Py_XDECREF(%(y)s); Py_XDECREF(%(y)s);
%(y)s = (PyArrayObject*)PyArray_FromArray( %(y)s = (PyArrayObject*)PyArray_FromArray(
%(x)s, 0, NPY_ENSURECOPY); %(x)s, 0, NPY_ARRAY_ENSURECOPY);
if (!(%y)s) %(fail)s; if (!(%y)s) %(fail)s;
dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data; dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data;
dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data; dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data;
...@@ -147,7 +147,7 @@ the correct size for the output. This is essentially simulating the line ...@@ -147,7 +147,7 @@ the correct size for the output. This is essentially simulating the line
Py_XDECREF(%(y)s); Py_XDECREF(%(y)s);
%(y)s = (PyArrayObject*)PyArray_FromArray( %(y)s = (PyArrayObject*)PyArray_FromArray(
%(x)s, 0, NPY_ENSURECOPY); %(x)s, 0, NPY_ARRAY_ENSURECOPY);
The first line reduces the reference count of the data that y originally The first line reduces the reference count of the data that y originally
pointed to. The second line allocates the new data and makes y point to it. pointed to. The second line allocates the new data and makes y point to it.
......
...@@ -56,30 +56,30 @@ class BROKEN_ON_PURPOSE_Add(gof.Op): ...@@ -56,30 +56,30 @@ class BROKEN_ON_PURPOSE_Add(gof.Op):
a, b = inp a, b = inp
z, = out z, = out
return """ return """
if (%(a)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 1"); %(fail)s;} if (PyArray_NDIM(%(a)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 1"); %(fail)s;}
if (%(b)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1"); %(fail)s;} if (PyArray_NDIM(%(b)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1"); %(fail)s;}
if (%(a)s->descr->type_num != PyArray_DOUBLE) if (PyArray_DESCR(%(a)s)->type_num != NPY_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "a dtype not NPY_DOUBLE"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a dtype not NPY_DOUBLE"); %(fail)s;}
if (%(b)s->descr->type_num != PyArray_DOUBLE) if (PyArray_DESCR(%(b)s)->type_num != NPY_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "b's dtype not NPY_DOUBLE"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "b's dtype not NPY_DOUBLE"); %(fail)s;}
if (%(a)s->dimensions[0] != %(b)s->dimensions[0]) if (PyArray_DIMS(%(a)s)[0] != PyArray_DIMS(%(b)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a and b have different lengths"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a and b have different lengths"); %(fail)s;}
if ((!%(z)s) if ((!%(z)s)
|| (%(z)s->dimensions[0] != %(b)s->dimensions[0]) || (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(b)s)[0])
) )
{ {
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
npy_intp dims[] = {0}; npy_intp dims[] = {0};
dims[0] = %(b)s->dimensions[0]; dims[0] = PyArray_DIMS(%(b)s)[0];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(b)s->descr->type_num); %(z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, PyArray_DESCR(%(b)s)->type_num);
} }
{ {
for (npy_intp m = 0; m < %(z)s->dimensions[0]; ++m) for (npy_intp m = 0; m < PyArray_DIMS(%(z)s)[0]; ++m)
{ {
((double*)PyArray_GETPTR1(%(z)s, m))[0] ((double*)PyArray_GETPTR1(%(z)s, m))[0]
= 0.5 = 0.5
...@@ -150,13 +150,13 @@ class WeirdBrokenOp(gof.Op): ...@@ -150,13 +150,13 @@ class WeirdBrokenOp(gof.Op):
else: else:
z_code = """ z_code = """
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, %(a)s->dimensions, %(a)s->descr->type_num); %(z)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(a)s), PyArray_DESCR(%(a)s)->type_num);
""" """
prep_vars = """ prep_vars = """
//the output array has size M x N //the output array has size M x N
npy_intp M = %(a)s->dimensions[0]; npy_intp M = PyArray_DIMS(%(a)s)[0];
npy_intp Sa = %(a)s->strides[0] / %(a)s->descr->elsize; npy_intp Sa = %(a)s->strides[0] / PyArray_DESCR(%(a)s)->elsize;
npy_intp Sz = %(z)s->strides[0] / %(z)s->descr->elsize; npy_intp Sz = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
npy_double * Da = (npy_double*)%(a)s->data; npy_double * Da = (npy_double*)%(a)s->data;
npy_double * Dz = (npy_double*)%(z)s->data; npy_double * Dz = (npy_double*)%(z)s->data;
...@@ -603,22 +603,22 @@ class BrokenCImplementationAdd(gof.Op): ...@@ -603,22 +603,22 @@ class BrokenCImplementationAdd(gof.Op):
debug = 0 debug = 0
return """ return """
//printf("executing c_code\\n"); //printf("executing c_code\\n");
if (%(a)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 2"); %(fail)s;} if (PyArray_NDIM(%(a)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 2"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;} if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(a)s->descr->type_num != PyArray_FLOAT) if (PyArray_DESCR(%(a)s)->type_num != NPY_FLOAT)
{PyErr_SetString(PyExc_NotImplementedError, "a dtype not NPY_FLOAT"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a dtype not NPY_FLOAT"); %(fail)s;}
if (%(b)s->descr->type_num != PyArray_FLOAT) if (PyArray_DESCR(%(b)s)->type_num != NPY_FLOAT)
{PyErr_SetString(PyExc_NotImplementedError, "b's dtype not NPY_FLOAT"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "b's dtype not NPY_FLOAT"); %(fail)s;}
if (%(a)s->dimensions[0] != %(a)s->dimensions[1]) if (PyArray_DIMS(%(a)s)[0] != PyArray_DIMS(%(a)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "a is not square"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a is not square"); %(fail)s;}
if (%(b)s->dimensions[0] != %(b)s->dimensions[1]) if (PyArray_DIMS(%(b)s)[0] != PyArray_DIMS(%(b)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "b is not square"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "b is not square"); %(fail)s;}
if (%(a)s->dimensions[0] != %(b)s->dimensions[0]) if (PyArray_DIMS(%(a)s)[0] != PyArray_DIMS(%(b)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a and b have different dimensions"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a and b have different dimensions"); %(fail)s;}
// We do not check for c_contiguous property here // We do not check for c_contiguous property here
...@@ -626,32 +626,32 @@ class BrokenCImplementationAdd(gof.Op): ...@@ -626,32 +626,32 @@ class BrokenCImplementationAdd(gof.Op):
{ {
if (!%(z)s) if (!%(z)s)
printf("%(z)s is not there, %%p \\n", %(z)s); printf("%(z)s is not there, %%p \\n", %(z)s);
else if (%(z)s->dimensions[0] != %(b)s->dimensions[0]) else if (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(b)s)[0])
printf("Dimension 0 mismatch for %(z)s and %(b)s\\n"); printf("Dimension 0 mismatch for %(z)s and %(b)s\\n");
else if (%(z)s->dimensions[1] != %(b)s->dimensions[1]) else if (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1])
printf("Dimension 1 mismatch for %(z)s and %(b)s\\n"); printf("Dimension 1 mismatch for %(z)s and %(b)s\\n");
else else
printf("Reusing %(z)s\\n"); printf("Reusing %(z)s\\n");
} }
if ((!%(z)s) if ((!%(z)s)
|| (%(z)s->dimensions[0] != %(b)s->dimensions[0]) || (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(b)s)[0])
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1]) || (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1])
) )
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
npy_intp dims[] = {0, 0}; npy_intp dims[] = {0, 0};
dims[0] = %(b)s->dimensions[0]; dims[0] = PyArray_DIMS(%(b)s)[0];
dims[1] = %(b)s->dimensions[1]; dims[1] = PyArray_DIMS(%(b)s)[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(b)s->descr->type_num); %(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, PyArray_DESCR(%(b)s)->type_num);
} }
// Let us assume that %(z)s is c_contiguous // Let us assume that %(z)s is c_contiguous
{ {
dtype_%(z)s * z = ((dtype_%(z)s*)(PyArray_GETPTR2(%(z)s,0,0))); dtype_%(z)s * z = ((dtype_%(z)s*)(PyArray_GETPTR2(%(z)s,0,0)));
for (int i=0; i<%(b)s->dimensions[0]; i++) for (int i=0; i<PyArray_DIMS(%(b)s)[0]; i++)
{ {
for (int j=0; j<%(b)s->dimensions[1]; j++) for (int j=0; j<PyArray_DIMS(%(b)s)[1]; j++)
{ {
*z = ((float*)PyArray_GETPTR2(%(a)s, i, j))[0] + *z = ((float*)PyArray_GETPTR2(%(a)s, i, j))[0] +
((float*)PyArray_GETPTR2(%(b)s, i, j))[0] ; ((float*)PyArray_GETPTR2(%(b)s, i, j))[0] ;
......
...@@ -1453,6 +1453,22 @@ class GCC_compiler(object): ...@@ -1453,6 +1453,22 @@ class GCC_compiler(object):
@staticmethod @staticmethod
def compile_args(): def compile_args():
cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag] cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag]
#NumPy 1.7 Deprecate the old API. I updated most of the places
#to use the new API, but not everywhere. When finished, enable
#the following macro to assert that we don't bring new code
#that use the old API.
#cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
# numpy 1.7 deprecated the following macro but the didn't
# existed in the past
if bool(numpy_ver < [1, 7]):
cxxflags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
cxxflags.append("-D NPY_ARRAY_ALIGNED=NPY_ALIGNED")
cxxflags.append("-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
cxxflags.append("-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
cxxflags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
cxxflags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")
return cxxflags return cxxflags
@staticmethod @staticmethod
......
...@@ -288,7 +288,8 @@ class TestComputeTestValue(unittest.TestCase): ...@@ -288,7 +288,8 @@ class TestComputeTestValue(unittest.TestCase):
# The second is a new message in numpy 1.6. # The second is a new message in numpy 1.6.
assert (str(e).startswith("shape mismatch") or assert (str(e).startswith("shape mismatch") or
str(e).startswith("operands could not be broadcast " str(e).startswith("operands could not be broadcast "
"together with shapes")) "together with shapes") or
str(e).startswith("could not broadcast input"))
finally: finally:
theano.config.compute_test_value = orig_compute_test_value theano.config.compute_test_value = orig_compute_test_value
......
...@@ -123,7 +123,7 @@ class GpuDot22Scalar(GpuOp): ...@@ -123,7 +123,7 @@ class GpuDot22Scalar(GpuOp):
fail = sub['fail'] fail = sub['fail']
return """ return """
#define REAL float #define REAL float
float %(name)s_a = (%(a)s->descr->type_num == PyArray_FLOAT) float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT)
? (REAL)(((float*)%(a)s->data)[0]) ? (REAL)(((float*)%(a)s->data)[0])
: (REAL)(((double*)%(a)s->data)[0]); : (REAL)(((double*)%(a)s->data)[0]);
#undef REAL #undef REAL
...@@ -231,11 +231,11 @@ class GpuGemm(GpuOp): ...@@ -231,11 +231,11 @@ class GpuGemm(GpuOp):
print >> sio, """ print >> sio, """
#define REAL float #define REAL float
float %(name)s_a = (%(a)s->descr->type_num == PyArray_FLOAT) float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT)
? (REAL)(((float*)%(a)s->data)[0]) ? (REAL)(((float*)%(a)s->data)[0])
: (REAL)(((double*)%(a)s->data)[0]); : (REAL)(((double*)%(a)s->data)[0]);
float %(name)s_b = (%(b)s->descr->type_num == PyArray_FLOAT) ? float %(name)s_b = (PyArray_TYPE(%(b)s) == NPY_FLOAT) ?
(REAL)(((float*)%(b)s->data)[0]) (REAL)(((float*)%(b)s->data)[0])
: (REAL)(((double*)%(b)s->data)[0]); : (REAL)(((double*)%(b)s->data)[0]);
#undef REAL #undef REAL
......
...@@ -788,7 +788,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){ ...@@ -788,7 +788,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
PyErr_SetString(PyExc_TypeError, "CudaNdarray_TakeFrom: need a ndarray for indices with dtype int32"); PyErr_SetString(PyExc_TypeError, "CudaNdarray_TakeFrom: need a ndarray for indices with dtype int32");
return NULL; return NULL;
} }
if (((PyArrayObject*)indices_obj)->nd != 1) { if (PyArray_NDIM(((PyArrayObject*)indices_obj)) != 1) {
PyErr_SetString(PyExc_TypeError, "CudaNdarray_TakeFrom: need a CudaNdarray of indices with only 1 dimensions"); PyErr_SetString(PyExc_TypeError, "CudaNdarray_TakeFrom: need a CudaNdarray of indices with only 1 dimensions");
return NULL; return NULL;
} }
...@@ -2921,7 +2921,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s ...@@ -2921,7 +2921,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
Py_DECREF(broadcastable); Py_DECREF(broadcastable);
return NULL; return NULL;
} }
for (int i = 0; i < data->nd; ++i) for (int i = 0; i < PyArray_NDIM(data); ++i)
{ {
if ((data->dimensions[i] > 1) && PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i)))) if ((data->dimensions[i] > 1) && PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i))))
{ {
...@@ -3102,7 +3102,7 @@ cublas_shutdown() ...@@ -3102,7 +3102,7 @@ cublas_shutdown()
int int
CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj) CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj)
{ {
int err = CudaNdarray_alloc_contiguous(self, obj->nd, obj->dimensions); int err = CudaNdarray_alloc_contiguous(self, PyArray_NDIM(obj), obj->dimensions);
if (err) { if (err) {
return err; return err;
} }
......
...@@ -214,7 +214,7 @@ class GpuImages2Neibs(Images2Neibs, GpuOp): ...@@ -214,7 +214,7 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
%(fail)s; %(fail)s;
} }
if (%(neib_shape)s->dimensions[0] != 2) if (PyArray_DIMS(%(neib_shape)s)[0] != 2)
{ {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"neib_shape has to contain two elements"); "neib_shape has to contain two elements");
......
...@@ -153,7 +153,7 @@ class CURAND_Base(GpuOp): ...@@ -153,7 +153,7 @@ class CURAND_Base(GpuOp):
%(ndim)s, %(size)s->dimensions[0]); %(ndim)s, %(size)s->dimensions[0]);
%(fail)s %(fail)s
} }
if (%(size)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(size)s)->type_num != NPY_INT32)
{ {
PyErr_SetString(PyExc_ValueError, "size must be int32"); PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)s %(fail)s
......
...@@ -55,31 +55,31 @@ class MultinomialFromUniform(Op): ...@@ -55,31 +55,31 @@ class MultinomialFromUniform(Op):
fail = sub['fail'] fail = sub['fail']
return """ return """
if (%(pvals)s->nd != 2) if (PyArray_NDIM(%(pvals)s) != 2)
{ {
PyErr_Format(PyExc_TypeError, "pvals wrong rank"); PyErr_Format(PyExc_TypeError, "pvals wrong rank");
%(fail)s; %(fail)s;
} }
if (%(unis)s->nd != 1) if (PyArray_NDIM(%(unis)s) != 1)
{ {
PyErr_Format(PyExc_TypeError, "unis wrong rank"); PyErr_Format(PyExc_TypeError, "unis wrong rank");
%(fail)s; %(fail)s;
} }
if (%(unis)s->dimensions[0] != %(pvals)s->dimensions[0]) if (PyArray_DIMS(%(unis)s)[0] != PyArray_DIMS(%(pvals)s)[0])
{ {
PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0]"); PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0]");
%(fail)s; %(fail)s;
} }
if ((NULL == %(z)s) if ((NULL == %(z)s)
|| ((%(z)s->dimensions)[0] != (%(pvals)s->dimensions)[0]) || ((PyArray_DIMS(%(z)s))[0] != (PyArray_DIMS(%(pvals)s))[0])
|| ((%(z)s->dimensions)[1] != (%(pvals)s->dimensions)[1]) || ((PyArray_DIMS(%(z)s))[1] != (PyArray_DIMS(%(pvals)s))[1])
) )
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_ZEROS(2, %(z)s = (PyArrayObject*) PyArray_ZEROS(2,
%(pvals)s->dimensions, PyArray_DIMS(%(pvals)s),
type_num_%(z)s, type_num_%(z)s,
0); 0);
if (!%(z)s) if (!%(z)s)
...@@ -91,8 +91,8 @@ class MultinomialFromUniform(Op): ...@@ -91,8 +91,8 @@ class MultinomialFromUniform(Op):
{ // NESTED SCOPE { // NESTED SCOPE
const int nb_multi = %(pvals)s->dimensions[0]; const int nb_multi = PyArray_DIMS(%(pvals)s)[0];
const int nb_outcomes = %(pvals)s->dimensions[1]; const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1];
// //
// For each multinomial, loop over each possible outcome // For each multinomial, loop over each possible outcome
...@@ -233,12 +233,12 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp): ...@@ -233,12 +233,12 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
fail = sub['fail'] fail = sub['fail']
return """ return """
if (%(pvals)s->nd != 2) if (PyArray_NDIM(%(pvals)s) != 2)
{ {
PyErr_Format(PyExc_TypeError, "pvals wrong rank"); PyErr_Format(PyExc_TypeError, "pvals wrong rank");
%(fail)s; %(fail)s;
} }
if (%(unis)s->nd != 1) if (PyArray_NDIM(%(unis)s) != 1)
{ {
PyErr_Format(PyExc_TypeError, "unis wrong rank"); PyErr_Format(PyExc_TypeError, "unis wrong rank");
%(fail)s; %(fail)s;
......
...@@ -114,28 +114,28 @@ class Images2Neibs(Op): ...@@ -114,28 +114,28 @@ class Images2Neibs(Op):
int grid_c = -1; //number of patch in height int grid_c = -1; //number of patch in height
int grid_d = -1; //number of patch in width int grid_d = -1; //number of patch in width
{ {
if (%(ten4)s->nd != 4) if (PyArray_NDIM(%(ten4)s) != 4)
{ {
PyErr_Format(PyExc_TypeError, "ten4 wrong rank"); PyErr_Format(PyExc_TypeError, "ten4 wrong rank");
%(fail)s; %(fail)s;
} }
if (%(neib_shape)s->nd != 1) if (PyArray_NDIM(%(neib_shape)s) != 1)
{ {
PyErr_Format(PyExc_TypeError, "neib_shape wrong rank"); PyErr_Format(PyExc_TypeError, "neib_shape wrong rank");
%(fail)s; %(fail)s;
} }
if ( (%(neib_shape)s->dimensions)[0] != 2) if ( (PyArray_DIMS(%(neib_shape)s))[0] != 2)
{ {
PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to" PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to"
" contain 2 elements"); " contain 2 elements");
%(fail)s; %(fail)s;
} }
if (%(neib_step)s->nd != 1) if (PyArray_NDIM(%(neib_step)s) != 1)
{ {
PyErr_Format(PyExc_TypeError, "neib_step wrong rank"); PyErr_Format(PyExc_TypeError, "neib_step wrong rank");
%(fail)s; %(fail)s;
} }
if ( (%(neib_step)s->dimensions)[0] != 2) if ( (PyArray_DIMS(%(neib_step)s))[0] != 2)
{ {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"neib_step wrong step ; has to contain 2 elements"); "neib_step wrong step ; has to contain 2 elements");
...@@ -154,33 +154,33 @@ class Images2Neibs(Op): ...@@ -154,33 +154,33 @@ class Images2Neibs(Op):
PyErr_Format(PyExc_TypeError, "Images2Neibs: in mode wrap_centered need patch with odd shapes"); PyErr_Format(PyExc_TypeError, "Images2Neibs: in mode wrap_centered need patch with odd shapes");
%(fail)s; %(fail)s;
} }
if ( (%(ten4)s->dimensions)[2] < c || (%(ten4)s->dimensions)[3] < d) if ( (PyArray_DIMS(%(ten4)s))[2] < c || (PyArray_DIMS(%(ten4)s))[3] < d)
{ {
PyErr_Format(PyExc_TypeError, "Images2Neibs: in wrap_centered mode, don't support image shapes smaller then the patch shapes: neib_shape=(%%ld,%%ld), ten4[2:]=[%%ld,%%ld]", PyErr_Format(PyExc_TypeError, "Images2Neibs: in wrap_centered mode, don't support image shapes smaller then the patch shapes: neib_shape=(%%ld,%%ld), ten4[2:]=[%%ld,%%ld]",
(long int)c, (long int)d, (long int)(%(ten4)s->dimensions[2]), (long int)(%(ten4)s->dimensions[3])); (long int)c, (long int)d, (long int)(PyArray_DIMS(%(ten4)s)[2]), (long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s; %(fail)s;
} }
grid_c = CEIL_INTDIV(((%(ten4)s->dimensions)[2]),step_x); grid_c = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[2]),step_x);
grid_d = CEIL_INTDIV(((%(ten4)s->dimensions)[3]),step_y); grid_d = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[3]),step_y);
}else if ( "%(mode)s" == "valid") { }else if ( "%(mode)s" == "valid") {
if ( ((%(ten4)s->dimensions)[2] < c) ||( (((%(ten4)s->dimensions)[2]-c) %% step_x)!=0)) if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||( (((PyArray_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
{ {
PyErr_Format(PyExc_TypeError, "neib_shape[0]=%%ld, neib_step[0]=%%ld and ten4.shape[2]=%%ld not consistent", PyErr_Format(PyExc_TypeError, "neib_shape[0]=%%ld, neib_step[0]=%%ld and ten4.shape[2]=%%ld not consistent",
(long int)c, (long int)step_x, (long int)(%(ten4)s->dimensions[2])); (long int)c, (long int)step_x, (long int)(PyArray_DIMS(%(ten4)s)[2]));
%(fail)s; %(fail)s;
} }
if ( ((%(ten4)s->dimensions)[3] < d) ||( (((%(ten4)s->dimensions)[3]-d) %% step_y)!=0)) if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||( (((PyArray_DIMS(%(ten4)s))[3]-d) %% step_y)!=0))
{ {
PyErr_Format(PyExc_TypeError, "neib_shape[1]=%%ld, neib_step[1]=%%ld and ten4.shape[3]=%%ld not consistent", PyErr_Format(PyExc_TypeError, "neib_shape[1]=%%ld, neib_step[1]=%%ld and ten4.shape[3]=%%ld not consistent",
(long int)d, (long int)step_y, (long int)(%(ten4)s->dimensions[3])); (long int)d, (long int)step_y, (long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s; %(fail)s;
} }
grid_c = 1+(((%(ten4)s->dimensions)[2]-c)/step_x); //number of patch in height grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x); //number of patch in height
grid_d = 1+(((%(ten4)s->dimensions)[3]-d)/step_y); //number of patch in width grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y); //number of patch in width
}else if ( "%(mode)s" == "ignore_borders") { }else if ( "%(mode)s" == "ignore_borders") {
grid_c = 1+(((%(ten4)s->dimensions)[2]-c)/step_x); //number of patch in height grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x); //number of patch in height
grid_d = 1+(((%(ten4)s->dimensions)[3]-d)/step_y); //number of patch in width grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y); //number of patch in width
}else{ }else{
PyErr_Format(PyExc_TypeError, "Images2Neibs: unknow mode '%(mode)s'"); PyErr_Format(PyExc_TypeError, "Images2Neibs: unknow mode '%(mode)s'");
%(fail)s; %(fail)s;
...@@ -190,12 +190,12 @@ class Images2Neibs(Op): ...@@ -190,12 +190,12 @@ class Images2Neibs(Op):
const npy_intp z_dim1 = c * d; const npy_intp z_dim1 = c * d;
const npy_intp z_dim0 = grid_c const npy_intp z_dim0 = grid_c
* grid_d * grid_d
* (%(ten4)s->dimensions)[1] * (PyArray_DIMS(%(ten4)s))[1]
* (%(ten4)s->dimensions)[0]; * (PyArray_DIMS(%(ten4)s))[0];
if ((NULL == %(z)s) if ((NULL == %(z)s)
|| ((%(z)s->dimensions)[0] != z_dim0 ) || ((PyArray_DIMS(%(z)s))[0] != z_dim0 )
|| ((%(z)s->dimensions)[1] != z_dim1 ) || ((PyArray_DIMS(%(z)s))[1] != z_dim1 )
) )
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
...@@ -218,10 +218,10 @@ class Images2Neibs(Op): ...@@ -218,10 +218,10 @@ class Images2Neibs(Op):
{ // NESTED SCOPE { // NESTED SCOPE
const int nb_batch = (%(ten4)s->dimensions)[0]; const int nb_batch = (PyArray_DIMS(%(ten4)s))[0];
const int nb_stack = (%(ten4)s->dimensions)[1]; const int nb_stack = (PyArray_DIMS(%(ten4)s))[1];
const int height = (%(ten4)s->dimensions)[2]; const int height = (PyArray_DIMS(%(ten4)s))[2];
const int width = (%(ten4)s->dimensions)[3]; const int width = (PyArray_DIMS(%(ten4)s))[3];
// (c,d) = neib_shape // (c,d) = neib_shape
const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0); const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
......
...@@ -220,9 +220,9 @@ class mrg_uniform(mrg_uniform_base): ...@@ -220,9 +220,9 @@ class mrg_uniform(mrg_uniform_base):
rstate, size = inp rstate, size = inp
o_rstate, o_sample = out o_rstate, o_sample = out
if self.inplace: if self.inplace:
o_rstate_requirement = 'NPY_C_CONTIGUOUS|NPY_ALIGNED' o_rstate_requirement = 'NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED'
else: else:
o_rstate_requirement = 'NPY_ENSURECOPY|NPY_C_CONTIGUOUS|NPY_ALIGNED' o_rstate_requirement = 'NPY_ARRAY_ENSURECOPY|NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED'
ndim = self.output_type.ndim ndim = self.output_type.ndim
o_type_num = numpy.asarray(0, dtype=self.output_type.dtype).dtype.num o_type_num = numpy.asarray(0, dtype=self.output_type.dtype).dtype.num
fail = sub['fail'] fail = sub['fail']
...@@ -241,7 +241,7 @@ class mrg_uniform(mrg_uniform_base): ...@@ -241,7 +241,7 @@ class mrg_uniform(mrg_uniform_base):
int n_elements = 1; int n_elements = 1;
int n_streams = 0; int n_streams = 0;
int must_alloc_sample = ((NULL == %(o_sample)s) int must_alloc_sample = ((NULL == %(o_sample)s)
|| (%(o_sample)s->nd != %(ndim)s) || (PyArray_NDIM(%(o_sample)s) != %(ndim)s)
|| !(PyArray_ISCONTIGUOUS(%(o_sample)s))); || !(PyArray_ISCONTIGUOUS(%(o_sample)s)));
%(otype)s * sample_data; %(otype)s * sample_data;
npy_int32 * state_data; npy_int32 * state_data;
...@@ -261,18 +261,18 @@ class mrg_uniform(mrg_uniform_base): ...@@ -261,18 +261,18 @@ class mrg_uniform(mrg_uniform_base):
const npy_int32 MASK2 = 65535; //2^16 - 1 const npy_int32 MASK2 = 65535; //2^16 - 1
const npy_int32 MULT2 = 21069; const npy_int32 MULT2 = 21069;
if (%(size)s->nd != 1) if (PyArray_NDIM(%(size)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "size must be vector"); PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)s %(fail)s
} }
if (%(size)s->dimensions[0] != %(ndim)s) if (PyArray_DIMS(%(size)s)[0] != %(ndim)s)
{ {
PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)", PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)",
%(ndim)s, int(%(size)s->dimensions[0])); %(ndim)s, int(PyArray_DIMS(%(size)s)[0]));
%(fail)s %(fail)s
} }
if (%(size)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(size)s)->type_num != NPY_INT32)
{ {
PyErr_SetString(PyExc_ValueError, "size must be int32"); PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)s %(fail)s
...@@ -281,7 +281,7 @@ class mrg_uniform(mrg_uniform_base): ...@@ -281,7 +281,7 @@ class mrg_uniform(mrg_uniform_base):
{ {
odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0]; odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0];
n_elements *= odims[i]; n_elements *= odims[i];
must_alloc_sample = must_alloc_sample || (%(o_sample)s->dimensions[i] != odims[i]); must_alloc_sample = must_alloc_sample || (PyArray_DIMS(%(o_sample)s)[i] != odims[i]);
//fprintf(stderr, "size %%i %%i\\n", i, (int)odims[i]); //fprintf(stderr, "size %%i %%i\\n", i, (int)odims[i]);
} }
if (must_alloc_sample) if (must_alloc_sample)
...@@ -296,22 +296,22 @@ class mrg_uniform(mrg_uniform_base): ...@@ -296,22 +296,22 @@ class mrg_uniform(mrg_uniform_base):
Py_XDECREF(%(o_rstate)s); Py_XDECREF(%(o_rstate)s);
%(o_rstate)s = (PyArrayObject*)PyArray_FromAny(py_%(rstate)s, NULL, 0, 0, %(o_rstate_requirement)s,NULL); %(o_rstate)s = (PyArrayObject*)PyArray_FromAny(py_%(rstate)s, NULL, 0, 0, %(o_rstate_requirement)s,NULL);
if (%(o_rstate)s->nd != 2) if (PyArray_NDIM(%(o_rstate)s) != 2)
{ {
PyErr_SetString(PyExc_ValueError, "rstate must be matrix"); PyErr_SetString(PyExc_ValueError, "rstate must be matrix");
%(fail)s %(fail)s
} }
if (%(o_rstate)s->dimensions[1] != 6) if (PyArray_DIMS(%(o_rstate)s)[1] != 6)
{ {
PyErr_Format(PyExc_ValueError, "rstate must have 6 columns"); PyErr_Format(PyExc_ValueError, "rstate must have 6 columns");
%(fail)s %(fail)s
} }
if (%(o_rstate)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(o_rstate)s)->type_num != NPY_INT32)
{ {
PyErr_SetString(PyExc_ValueError, "rstate must be int32"); PyErr_SetString(PyExc_ValueError, "rstate must be int32");
%(fail)s %(fail)s
} }
n_streams = %(o_rstate)s->dimensions[0]; n_streams = PyArray_DIMS(%(o_rstate)s)[0];
sample_data = (%(otype)s *) %(o_sample)s->data; sample_data = (%(otype)s *) %(o_sample)s->data;
state_data = (npy_int32 *) %(o_rstate)s->data; state_data = (npy_int32 *) %(o_rstate)s->data;
...@@ -501,20 +501,20 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -501,20 +501,20 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
int must_alloc_sample = ((NULL == %(o_sample)s) int must_alloc_sample = ((NULL == %(o_sample)s)
|| !CudaNdarray_Check(py_%(o_sample)s) || !CudaNdarray_Check(py_%(o_sample)s)
|| !CudaNdarray_is_c_contiguous(%(o_sample)s) || !CudaNdarray_is_c_contiguous(%(o_sample)s)
|| (%(o_sample)s->nd != %(ndim)s)); || (PyArray_NDIM(%(o_sample)s) != %(ndim)s));
if (%(size)s->nd != 1) if (PyArray_NDIM(%(size)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "size must be vector"); PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)s %(fail)s
} }
if (%(size)s->dimensions[0] != %(ndim)s) if (PyArray_DIMS(%(size)s)[0] != %(ndim)s)
{ {
PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)", PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)",
%(ndim)s, %(size)s->dimensions[0]); %(ndim)s, PyArray_DIMS(%(size)s)[0]);
%(fail)s %(fail)s
} }
if (%(size)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(size)s)->type_num != NPY_INT32)
{ {
PyErr_SetString(PyExc_ValueError, "size must be int32"); PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)s %(fail)s
...@@ -552,7 +552,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp): ...@@ -552,7 +552,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
%(o_rstate)s = (CudaNdarray*)CudaNdarray_Copy(%(rstate)s); %(o_rstate)s = (CudaNdarray*)CudaNdarray_Copy(%(rstate)s);
} }
if (%(o_rstate)s->nd != 1) if (PyArray_NDIM(%(o_rstate)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "rstate must be vector"); PyErr_SetString(PyExc_ValueError, "rstate must be vector");
%(fail)s; %(fail)s;
......
...@@ -3006,38 +3006,38 @@ class StructuredDotGradCSC(gof.Op): ...@@ -3006,38 +3006,38 @@ class StructuredDotGradCSC(gof.Op):
'g_ab') 'g_ab')
return """ return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;} if (PyArray_NDIM(%(_d)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;} if (PyArray_NDIM(%(_g)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (%(_indices)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (%(_indptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) { if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32) if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1]) if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
if (!%(_zout)s if (!%(_zout)s
|| (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])) || (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]))
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_indices)s->dimensions, %(_g)s->descr->type_num); %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_g)s)->type_num);
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
npy_intp nnz = %(_indices)s->dimensions[0]; npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
npy_intp N = %(_indptr)s->dimensions[0]-1; //TODO: error checking with this npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/%(_indices)s->descr->elsize; npy_intp Sindices = %(_indices)s->strides[0]/PyArray_DESCR(%(_indices)s)->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/%(_indptr)s->descr->elsize; npy_intp Sindptr = %(_indptr)s->strides[0]/PyArray_DESCR(%(_indptr)s)->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/%(_d)s->descr->elsize; const npy_intp Sd1 = %(_d)s->strides[1]/PyArray_DESCR(%(_d)s)->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/%(_g)s->descr->elsize; const npy_intp Sg1 = %(_g)s->strides[1]/PyArray_DESCR(%(_g)s)->elsize;
const npy_intp K = %(_d)s->dimensions[1]; const npy_intp K = PyArray_DIMS(%(_d)s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
...@@ -3047,7 +3047,7 @@ class StructuredDotGradCSC(gof.Op): ...@@ -3047,7 +3047,7 @@ class StructuredDotGradCSC(gof.Op):
{ {
// extract j-th row of dense matrix // extract j-th row of dense matrix
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j); const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;} if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// for each non-null value in the sparse column // for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j * Sindptr]; i_idx < indptr[(j+1) * Sindptr]; ++i_idx) for (npy_int32 i_idx = indptr[j * Sindptr]; i_idx < indptr[(j+1) * Sindptr]; ++i_idx)
...@@ -3062,7 +3062,7 @@ class StructuredDotGradCSC(gof.Op): ...@@ -3062,7 +3062,7 @@ class StructuredDotGradCSC(gof.Op):
// make sure that row index is not bigger than actual number of rows // make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ? // Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ? // when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0]) if (i >= PyArray_DIMS(%(_g)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows // perform dot product of dense and sparse rows
...@@ -3142,39 +3142,39 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3142,39 +3142,39 @@ class StructuredDotGradCSR(gof.Op):
'g_ab') 'g_ab')
return """ return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;} if (PyArray_NDIM(%(_d)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;} if (PyArray_NDIM(%(_g)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (%(_indices)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (%(_indptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) { if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32) if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1]) if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
if (!%(_zout)s if (!%(_zout)s
|| (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])) || (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]))
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_indices)s->dimensions, %(_g)s->descr->type_num); %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_g)s)->type_num);
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
npy_intp nnz = %(_indices)s->dimensions[0]; npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
// extract number of rows // extract number of rows
npy_intp N = %(_indptr)s->dimensions[0]-1; //TODO: error checking with this npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/%(_indices)s->descr->elsize; npy_intp Sindices = %(_indices)s->strides[0]/PyArray_DESCR(%(_indices)s)->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/%(_indptr)s->descr->elsize; npy_intp Sindptr = %(_indptr)s->strides[0]/PyArray_DESCR(%(_indptr)s)->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/%(_d)s->descr->elsize; const npy_intp Sd1 = %(_d)s->strides[1]/PyArray_DESCR(%(_d)s)->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/%(_g)s->descr->elsize; const npy_intp Sg1 = %(_g)s->strides[1]/PyArray_DESCR(%(_g)s)->elsize;
const npy_intp K = %(_d)s->dimensions[1]; const npy_intp K = PyArray_DIMS(%(_d)s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
...@@ -3190,7 +3190,7 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3190,7 +3190,7 @@ class StructuredDotGradCSR(gof.Op):
// extract j-th row of dense matrix // extract j-th row of dense matrix
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j); const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;} if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// extract corresponding row in gradient // extract corresponding row in gradient
const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(%(_g)s->data + %(_g)s->strides[0] * i); const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(%(_g)s->data + %(_g)s->strides[0] * i);
...@@ -3199,7 +3199,7 @@ class StructuredDotGradCSR(gof.Op): ...@@ -3199,7 +3199,7 @@ class StructuredDotGradCSR(gof.Op):
// make sure that row index is not bigger than actual number of rows // make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ? // Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ? // when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0]) if (i >= PyArray_DIMS(%(_g)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows // perform dot product of dense and sparse rows
......
...@@ -112,59 +112,59 @@ class StructuredDotCSC(gof.Op): ...@@ -112,59 +112,59 @@ class StructuredDotCSC(gof.Op):
rval = """ rval = """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(a_nrows)s->nd != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;} if (PyArray_NDIM(%(a_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;} if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(a_val)s->descr->type_num != %(typenum_a_val)s) { if (PyArray_DESCR(%(a_val)s)->type_num != %(typenum_a_val)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val"); %(fail)s;}
if (%(b)s->descr->type_num != %(typenum_b)s) { if (PyArray_DESCR(%(b)s)->type_num != %(typenum_b)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) { if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(a_nrows)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(a_nrows)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0]) if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
if (%(a_ptr)s->dimensions[0] != %(b)s->dimensions[0]+1) if (PyArray_DIMS(%(a_ptr)s)[0] != PyArray_DIMS(%(b)s)[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;}
if ((!%(z)s) if ((!%(z)s)
|| (%(z)s->dimensions[0] != ((npy_int32 *)%(a_nrows)s->data)[0]) || (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)%(a_nrows)s->data)[0])
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1]) || (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1])
) )
{ {
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
npy_intp dims[] = {0, 0}; npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0]; dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0];
dims[1] = %(b)s->dimensions[1]; dims[1] = PyArray_DIMS(%(b)s)[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s); %(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
} }
{ {
// sparse array has size MxK, dense KxN, output MxN // sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(z)s->dimensions[0]; npy_intp M = PyArray_DIMS(%(z)s)[0];
npy_intp N = %(z)s->dimensions[1]; npy_intp N = PyArray_DIMS(%(z)s)[1];
npy_intp K = %(b)s->dimensions[0]; npy_intp K = PyArray_DIMS(%(b)s)[0];
// strides tell you how many bytes to skip to go to next column/row entry // strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = %(z)s->strides[0] / %(z)s->descr->elsize; npy_intp Szm = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(z)s->strides[1] / %(z)s->descr->elsize; npy_intp Szn = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize;
//npy_intp Sbm = %(b)s->strides[0] / %(b)s->descr->elsize; //npy_intp Sbm = %(b)s->strides[0] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sbn = %(b)s->strides[1] / %(b)s->descr->elsize; npy_intp Sbn = %(b)s->strides[1] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sval = %(a_val)s->strides[0] / %(a_val)s->descr->elsize; npy_intp Sval = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize; npy_intp Sind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize; npy_intp Sptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
...@@ -172,7 +172,7 @@ class StructuredDotCSC(gof.Op): ...@@ -172,7 +172,7 @@ class StructuredDotCSC(gof.Op):
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
//npy_intp nnz = %(a_ind)s->dimensions[0]; //npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
//clear the output array //clear the output array
memset(Dz, 0, M*N*sizeof(dtype_%(z)s)); memset(Dz, 0, M*N*sizeof(dtype_%(z)s));
...@@ -208,7 +208,7 @@ class StructuredDotCSC(gof.Op): ...@@ -208,7 +208,7 @@ class StructuredDotCSC(gof.Op):
dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m); dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m);
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint? //RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
if (m >= %(z)s->dimensions[0]) if (m >= PyArray_DIMS(%(z)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "illegal row index in a"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "illegal row index in a"); %(fail)s;}
// loop over final dimension (cols of dense matrix) and perform dot product // loop over final dimension (cols of dense matrix) and perform dot product
...@@ -301,46 +301,46 @@ class StructuredDotCSR(gof.Op): ...@@ -301,46 +301,46 @@ class StructuredDotCSR(gof.Op):
raise NotImplementedError('Complex types are not supported for b') raise NotImplementedError('Complex types are not supported for b')
return """ return """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;} if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) { if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0]) if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
if ((!%(z)s) if ((!%(z)s)
|| (%(z)s->dimensions[0] != %(a_ptr)s->dimensions[0]-1) //a's rows || (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(a_ptr)s)[0]-1) //a's rows
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1]) //b's columns || (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1]) //b's columns
) )
{ {
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
npy_intp dims[] = {0, 0}; npy_intp dims[] = {0, 0};
dims[0] = %(a_ptr)s->dimensions[0]-1; dims[0] = PyArray_DIMS(%(a_ptr)s)[0]-1;
dims[1] = %(b)s->dimensions[1]; dims[1] = PyArray_DIMS(%(b)s)[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s); %(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
} }
{ {
// sparse array has size MxK, dense KxN, output MxN // sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(z)s->dimensions[0]; npy_intp M = PyArray_DIMS(%(z)s)[0];
npy_intp N = %(z)s->dimensions[1]; npy_intp N = PyArray_DIMS(%(z)s)[1];
npy_intp K = %(b)s->dimensions[0]; npy_intp K = PyArray_DIMS(%(b)s)[0];
// strides tell you how many bytes to skip to go to next column/row entry // strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = %(z)s->strides[0] / %(z)s->descr->elsize; npy_intp Szm = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(z)s->strides[1] / %(z)s->descr->elsize; npy_intp Szn = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Sbm = %(b)s->strides[0] / %(b)s->descr->elsize; npy_intp Sbm = %(b)s->strides[0] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sbn = %(b)s->strides[1] / %(b)s->descr->elsize; npy_intp Sbn = %(b)s->strides[1] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sval = %(a_val)s->strides[0] / %(a_val)s->descr->elsize; npy_intp Sval = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize; npy_intp Sind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize; npy_intp Sptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
...@@ -348,7 +348,7 @@ class StructuredDotCSR(gof.Op): ...@@ -348,7 +348,7 @@ class StructuredDotCSR(gof.Op):
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
//npy_intp nnz = %(a_ind)s->dimensions[0]; //npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
//clear the output array //clear the output array
memset(Dz, 0, M*N*sizeof(dtype_%(z)s)); memset(Dz, 0, M*N*sizeof(dtype_%(z)s));
...@@ -535,55 +535,56 @@ class UsmmCscDense(gof.Op): ...@@ -535,55 +535,56 @@ class UsmmCscDense(gof.Op):
inplace = int(self.inplace) inplace = int(self.inplace)
rval = """ rval = """
if (%(x_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_val) != 1"); %(fail)s;}
if (%(x_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ind) != 1"); %(fail)s;}
if (%(x_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ptr) != 1"); %(fail)s;}
if (%(x_nrows)s->nd != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_nrows) != 0"); %(fail)s;}
if (%(y)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (%(x_val)s->descr->type_num != %(typenum_x_val)s) { if (PyArray_NDIM(%(x_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_val) != 1"); %(fail)s;}
if (PyArray_NDIM(%(x_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ind) != 1"); %(fail)s;}
if (PyArray_NDIM(%(x_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ptr) != 1"); %(fail)s;}
if (PyArray_NDIM(%(x_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (PyArray_NDIM(%(y)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (PyArray_DESCR(%(x_val)s)->type_num != %(typenum_x_val)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val"); %(fail)s;}
if (%(y)s->descr->type_num != %(typenum_y)s) { if (PyArray_DESCR(%(y)s)->type_num != %(typenum_y)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;}
if (%(z)s->descr->type_num != %(typenum_z)s) { if (PyArray_DESCR(%(z)s)->type_num != %(typenum_z)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z"); %(fail)s;}
if (%(alpha)s->descr->type_num != %(typenum_alpha)s) { if (PyArray_DESCR(%(alpha)s)->type_num != %(typenum_alpha)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha"); %(fail)s;}
if (%(x_ind)s->descr->type_num != PyArray_INT32) { if (PyArray_DESCR(%(x_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32"); %(fail)s;}
if (%(x_ptr)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(x_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32"); %(fail)s;}
if (%(x_nrows)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(x_nrows)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32"); %(fail)s;}
if (%(x_val)s->dimensions[0] != %(x_ind)s->dimensions[0]) if (PyArray_DIMS(%(x_val)s)[0] != PyArray_DIMS(%(x_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "x_val and x_ind have different lengths"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x_val and x_ind have different lengths"); %(fail)s;}
if (%(x_ptr)s->dimensions[0] != %(y)s->dimensions[0]+1) if (PyArray_DIMS(%(x_ptr)s)[0] != PyArray_DIMS(%(y)s)[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;}
if (%(z)s->dimensions[0] != ((npy_int32 *)%(x_nrows)s->data)[0] || %(z)s->dimensions[1] != %(y)s->dimensions[1]) if (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)%(x_nrows)s->data)[0] || PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(y)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size."); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size."); %(fail)s;}
if (PyArray_SIZE(%(alpha)s) != 1) if (PyArray_SIZE(%(alpha)s) != 1)
{PyErr_SetString(PyExc_NotImplementedError, "The number of element in alpha must be 1"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The number of element in alpha must be 1"); %(fail)s;}
if (%(alpha)s->nd != 2) if (PyArray_NDIM(%(alpha)s) != 2)
{PyErr_SetString(PyExc_NotImplementedError, "The number dimension of alpha must be 2"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The number dimension of alpha must be 2"); %(fail)s;}
if (%(x_val)s->nd != 1) if (PyArray_NDIM(%(x_val)s) != 1)
{PyErr_SetString(PyExc_NotImplementedError, "The number dimension of x_val must be 1"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The number dimension of x_val must be 1"); %(fail)s;}
if (%(y)s->nd != 2) if (PyArray_NDIM(%(y)s) != 2)
{PyErr_SetString(PyExc_NotImplementedError, "The number dimension of y must be 2"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The number dimension of y must be 2"); %(fail)s;}
if (%(z)s->nd != 2) if (PyArray_NDIM(%(z)s) != 2)
{PyErr_SetString(PyExc_NotImplementedError, "The number dimension of z must be 2"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The number dimension of z must be 2"); %(fail)s;}
if (%(inplace)s) if (%(inplace)s)
...@@ -596,22 +597,22 @@ class UsmmCscDense(gof.Op): ...@@ -596,22 +597,22 @@ class UsmmCscDense(gof.Op):
Py_INCREF(%(zn)s); Py_INCREF(%(zn)s);
} }
else if (!%(zn)s else if (!%(zn)s
|| (%(zn)s->dimensions[0] != ((npy_int32 *)%(x_nrows)s->data)[0]) || (PyArray_DIMS(%(zn)s)[0] != ((npy_int32 *)%(x_nrows)s->data)[0])
|| (%(zn)s->dimensions[1] != %(y)s->dimensions[1]) || (PyArray_DIMS(%(zn)s)[1] != PyArray_DIMS(%(y)s)[1])
) )
{ {
{Py_XDECREF(%(zn)s);} {Py_XDECREF(%(zn)s);}
npy_intp dims[] = {0, 0}; npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)%(x_nrows)s->data)[0]; dims[0] = ((npy_int32 *)%(x_nrows)s->data)[0];
dims[1] = %(y)s->dimensions[1]; dims[1] = PyArray_DIMS(%(y)s)[1];
%(zn)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_zn)s); %(zn)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_zn)s);
} }
{ {
// sparse array has size MxK, dense KxN, output MxN // sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(zn)s->dimensions[0]; npy_intp M = PyArray_DIMS(%(zn)s)[0];
npy_intp N = %(zn)s->dimensions[1]; npy_intp N = PyArray_DIMS(%(zn)s)[1];
npy_intp K = %(y)s->dimensions[0]; npy_intp K = PyArray_DIMS(%(y)s)[0];
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
const dtype_%(x_val)s* __restrict__ Dval = (dtype_%(x_val)s*)%(x_val)s->data; const dtype_%(x_val)s* __restrict__ Dval = (dtype_%(x_val)s*)%(x_val)s->data;
...@@ -619,12 +620,12 @@ class UsmmCscDense(gof.Op): ...@@ -619,12 +620,12 @@ class UsmmCscDense(gof.Op):
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(x_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)%(x_ptr)s->data;
const dtype_%(alpha)s alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0]; const dtype_%(alpha)s alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0];
npy_intp Sz = %(z)s->strides[1] / %(z)s->descr->elsize; npy_intp Sz = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(zn)s->strides[1] / %(zn)s->descr->elsize; npy_intp Szn = %(zn)s->strides[1] / PyArray_DESCR(%(zn)s)->elsize;
npy_intp Sval = %(x_val)s->strides[0] / %(x_val)s->descr->elsize; npy_intp Sval = %(x_val)s->strides[0] / PyArray_DESCR(%(x_val)s)->elsize;
npy_intp Sind = %(x_ind)s->strides[0] / %(x_ind)s->descr->elsize; npy_intp Sind = %(x_ind)s->strides[0] / PyArray_DESCR(%(x_ind)s)->elsize;
npy_intp Sptr = %(x_ptr)s->strides[0] / %(x_ptr)s->descr->elsize; npy_intp Sptr = %(x_ptr)s->strides[0] / PyArray_DESCR(%(x_ptr)s)->elsize;
npy_intp Sy = %(y)s->strides[1] / %(y)s->descr->elsize; npy_intp Sy = %(y)s->strides[1] / PyArray_DESCR(%(y)s)->elsize;
if (!(%(inplace)s)) if (!(%(inplace)s))
...@@ -742,58 +743,58 @@ class CSMGradC(gof.Op): ...@@ -742,58 +743,58 @@ class CSMGradC(gof.Op):
raise NotImplementedError('Complex types are not supported for b_val') raise NotImplementedError('Complex types are not supported for b_val')
return """ return """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(b_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_val) != 1"); %(fail)s;} if (PyArray_NDIM(%(b_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_val) != 1"); %(fail)s;}
if (%(b_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;} if (PyArray_NDIM(%(b_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;}
if (%(b_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;} if (PyArray_NDIM(%(b_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) { if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(b_ind)s->descr->type_num != PyArray_INT32) { if (PyArray_DESCR(%(b_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32"); %(fail)s;}
if (%(b_ptr)s->descr->type_num != PyArray_INT32) if (PyArray_DESCR(%(b_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0]) if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
if (%(b_val)s->dimensions[0] != %(b_ind)s->dimensions[0]) if (PyArray_DIMS(%(b_val)s)[0] != PyArray_DIMS(%(b_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "b_val and b_ind have different lengths"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "b_val and b_ind have different lengths"); %(fail)s;}
if (%(a_ptr)s->dimensions[0] != %(b_ptr)s->dimensions[0]) if (PyArray_DIMS(%(a_ptr)s)[0] != PyArray_DIMS(%(b_ptr)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr and b_ptr have different lengths"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "a_ptr and b_ptr have different lengths"); %(fail)s;}
if ((!%(z)s) || (%(z)s->dimensions[0] != %(a_val)s->dimensions[0])) if ((!%(z)s) || (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(a_val)s)[0]))
{ {
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
npy_intp dims[] = {0}; npy_intp dims[] = {0};
dims[0] = %(a_val)s->dimensions[0]; dims[0] = PyArray_DIMS(%(a_val)s)[0];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_z)s); %(z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_z)s);
} }
{ {
// sparse array has size MxK, dense KxN, output MxN // sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(a_ptr)s->dimensions[0] - 1; npy_intp M = PyArray_DIMS(%(a_ptr)s)[0] - 1;
npy_intp a_dim_0 = ((npy_int32 *)%(a_dim)s->data)[0]; npy_intp a_dim_0 = ((npy_int32 *)%(a_dim)s->data)[0];
npy_intp a_dim_1 = ((npy_int32 *)%(a_dim)s->data)[1]; npy_intp a_dim_1 = ((npy_int32 *)%(a_dim)s->data)[1];
npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0; npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
// strides tell you how many bytes to skip to go to next column/row entry // strides tell you how many bytes to skip to go to next column/row entry
npy_intp Sz = %(z)s->strides[0] / %(z)s->descr->elsize; npy_intp Sz = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Sa_val = %(a_val)s->strides[0] / %(a_val)s->descr->elsize; npy_intp Sa_val = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sa_ind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize; npy_intp Sa_ind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sa_ptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize; npy_intp Sa_ptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
npy_intp Sb_val = %(b_val)s->strides[0] / %(b_val)s->descr->elsize; npy_intp Sb_val = %(b_val)s->strides[0] / PyArray_DESCR(%(b_val)s)->elsize;
npy_intp Sb_ind = %(b_ind)s->strides[0] / %(b_ind)s->descr->elsize; npy_intp Sb_ind = %(b_ind)s->strides[0] / PyArray_DESCR(%(b_ind)s)->elsize;
npy_intp Sb_ptr = %(b_ptr)s->strides[0] / %(b_ptr)s->descr->elsize; npy_intp Sb_ptr = %(b_ptr)s->strides[0] / PyArray_DESCR(%(b_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
...@@ -804,7 +805,7 @@ class CSMGradC(gof.Op): ...@@ -804,7 +805,7 @@ class CSMGradC(gof.Op):
const npy_int32 * __restrict__ Db_ind = (npy_int32*)%(b_ind)s->data; const npy_int32 * __restrict__ Db_ind = (npy_int32*)%(b_ind)s->data;
const npy_int32 * __restrict__ Db_ptr = (npy_int32*)%(b_ptr)s->data; const npy_int32 * __restrict__ Db_ptr = (npy_int32*)%(b_ptr)s->data;
npy_intp nnz = %(a_ind)s->dimensions[0]; npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
dtype_%(b_val)s b_row[sp_dim]; dtype_%(b_val)s b_row[sp_dim];
...@@ -897,32 +898,32 @@ class MulSDCSC(gof.Op): ...@@ -897,32 +898,32 @@ class MulSDCSC(gof.Op):
raise NotImplementedError('Complex types are not supported for b') raise NotImplementedError('Complex types are not supported for b')
return """ return """
if (%(_b)s->nd != 2) { if (PyArray_NDIM(%(_b)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)s;} %(fail)s;}
if (%(_data)s->nd != 1) { if (PyArray_NDIM(%(_data)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1");
%(fail)s;} %(fail)s;}
if (%(_indices)s->nd != 1) { if (PyArray_NDIM(%(_indices)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)s;} %(fail)s;}
if (%(_indptr)s->nd != 1) { if (PyArray_NDIM(%(_indptr)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;} %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) { if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32) if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s) if (!%(_zout)s)
{ {
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_indices)s->dimensions, %(_b)s->descr->type_num); PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
} }
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0]) if (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
{ {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"somehow _zout got the wrong size.. and I don't know how to resize it."); "somehow _zout got the wrong size.. and I don't know how to resize it.");
...@@ -930,9 +931,9 @@ class MulSDCSC(gof.Op): ...@@ -930,9 +931,9 @@ class MulSDCSC(gof.Op):
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = %(_indices)s->dimensions[0]; const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = %(_indptr)s->dimensions[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
...@@ -1011,32 +1012,32 @@ class MulSDCSR(gof.Op): ...@@ -1011,32 +1012,32 @@ class MulSDCSR(gof.Op):
raise NotImplementedError('Complex types are not supported for b') raise NotImplementedError('Complex types are not supported for b')
return """ return """
if (%(_b)s->nd != 2) { if (PyArray_NDIM(%(_b)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)s;} %(fail)s;}
if (%(_data)s->nd != 1) { if (PyArray_NDIM(%(_data)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1");
%(fail)s;} %(fail)s;}
if (%(_indices)s->nd != 1) { if (PyArray_NDIM(%(_indices)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)s;} %(fail)s;}
if (%(_indptr)s->nd != 1) { if (PyArray_NDIM(%(_indptr)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;} %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) { if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32) if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s) if (!%(_zout)s)
{ {
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_indices)s->dimensions, %(_b)s->descr->type_num); PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
} }
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0]) if (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
{ {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"somehow _zout got the wrong size.. and I don't know how to resize it."); "somehow _zout got the wrong size.. and I don't know how to resize it.");
...@@ -1044,9 +1045,9 @@ class MulSDCSR(gof.Op): ...@@ -1044,9 +1045,9 @@ class MulSDCSR(gof.Op):
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = %(_indices)s->dimensions[0]; const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = %(_indptr)s->dimensions[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
...@@ -1161,42 +1162,42 @@ class MulSVCSR(gof.Op): ...@@ -1161,42 +1162,42 @@ class MulSVCSR(gof.Op):
raise NotImplementedError('Complex types are not supported for b') raise NotImplementedError('Complex types are not supported for b')
return """ return """
if (%(_b)s->nd != 1) { if (PyArray_NDIM(%(_b)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1");
%(fail)s; %(fail)s;
} }
if (%(_data)s->nd != 1) { if (PyArray_NDIM(%(_data)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1");
%(fail)s; %(fail)s;
} }
if (%(_indices)s->nd != 1) { if (PyArray_NDIM(%(_indices)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)s; %(fail)s;
} }
if (%(_indptr)s->nd != 1) { if (PyArray_NDIM(%(_indptr)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s; %(fail)s;
} }
if( %(_indices)s->descr->type_num != PyArray_INT32) { if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32) if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s if (!%(_zout)s
|| %(_zout)s->dimensions[0] != %(_indices)s->dimensions[0] || PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]
|| !PyArray_ISCONTIGUOUS(%(_zout)s)) || !PyArray_ISCONTIGUOUS(%(_zout)s))
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_indices)s->dimensions, %(_b)s->descr->type_num); PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = %(_indices)s->dimensions[0]; const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = %(_indptr)s->dimensions[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
...@@ -1206,7 +1207,7 @@ class MulSVCSR(gof.Op): ...@@ -1206,7 +1207,7 @@ class MulSVCSR(gof.Op):
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data;
const npy_intp Sb = %(_b)s->strides[0] / %(_b)s->descr->elsize; const npy_intp Sb = %(_b)s->strides[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over rows // loop over rows
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
...@@ -1311,36 +1312,36 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1311,36 +1312,36 @@ class StructuredAddSVCSR(gof.Op):
raise NotImplementedError('Complex types are not supported for b') raise NotImplementedError('Complex types are not supported for b')
return """ return """
if (%(_b)s->nd != 1) { if (PyArray_NDIM(%(_b)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1");
%(fail)s; %(fail)s;
} }
if (%(_data)s->nd != 1) { if (PyArray_NDIM(%(_data)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1");
%(fail)s; %(fail)s;
} }
if (%(_indices)s->nd != 1) { if (PyArray_NDIM(%(_indices)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)s; %(fail)s;
} }
if (%(_indptr)s->nd != 1) { if (PyArray_NDIM(%(_indptr)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s; %(fail)s;
} }
if( %(_indices)s->descr->type_num != PyArray_INT32) { if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32) if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s) if (!%(_zout)s)
{ {
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_indices)s->dimensions, %(_b)s->descr->type_num); PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
} }
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0]) if (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
{ {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"somehow _zout got the wrong size.. and I don't know how to resize it."); "somehow _zout got the wrong size.. and I don't know how to resize it.");
...@@ -1348,9 +1349,9 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1348,9 +1349,9 @@ class StructuredAddSVCSR(gof.Op):
} }
{ //makes it compile even though labels jump over variable definitions. { //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = %(_indices)s->dimensions[0]; const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
//TODO: error checking with this //TODO: error checking with this
const npy_intp N = %(_indptr)s->dimensions[0]-1; const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data; const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
...@@ -1360,7 +1361,7 @@ class StructuredAddSVCSR(gof.Op): ...@@ -1360,7 +1361,7 @@ class StructuredAddSVCSR(gof.Op):
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data; dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data;
const npy_intp Sb = %(_b)s->strides[0] / %(_b)s->descr->elsize; const npy_intp Sb = %(_b)s->strides[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over columns // loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
...@@ -1537,70 +1538,70 @@ class SamplingDotCSR(gof.Op): ...@@ -1537,70 +1538,70 @@ class SamplingDotCSR(gof.Op):
[]).dtype_specs()[-1] []).dtype_specs()[-1]
rval = """ rval = """
if (%(x)s->nd != 2) { if (PyArray_NDIM(%(x)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
if (%(y)s->nd != 2) { if (PyArray_NDIM(%(y)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (%(x)s->descr->type_num != %(typenum_x)s) { if (PyArray_DESCR(%(x)s)->type_num != %(typenum_x)s) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for x"); "Invalid type for x");
%(fail)s;} %(fail)s;}
if (%(y)s->descr->type_num != %(typenum_y)s) { if (PyArray_DESCR(%(y)s)->type_num != %(typenum_y)s) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for y"); "Invalid type for y");
%(fail)s;} %(fail)s;}
if (%(p_data)s->descr->type_num != %(typenum_p)s) { if (PyArray_DESCR(%(p_data)s)->type_num != %(typenum_p)s) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for pattern"); "Invalid type for pattern");
%(fail)s;} %(fail)s;}
if (%(x)s->dimensions[1] != %(y)s->dimensions[1]) { if (PyArray_DIMS(%(x)s)[1] != PyArray_DIMS(%(y)s)[1]) {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
"x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed."); "x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
%(fail)s;} %(fail)s;}
if (%(y)s->dimensions[0] != ((npy_int32 *)%(p_ncols)s->data)[0] || if (PyArray_DIMS(%(y)s)[0] != ((npy_int32 *)%(p_ncols)s->data)[0] ||
%(x)s->dimensions[0] != (%(p_ptr)s->dimensions[0] - 1)) PyArray_DIMS(%(x)s)[0] != (PyArray_DIMS(%(p_ptr)s)[0] - 1))
{PyErr_SetString(PyExc_NotImplementedError, {PyErr_SetString(PyExc_NotImplementedError,
"The dimension of the pattern and the output must match"); %(fail)s;} "The dimension of the pattern and the output must match"); %(fail)s;}
// Allocate output // Allocate output
if (!%(z_data)s if (!%(z_data)s
|| (%(z_data)s->dimensions[0] != %(p_data)s->dimensions[0]) || (PyArray_DIMS(%(z_data)s)[0] != PyArray_DIMS(%(p_data)s)[0])
|| (%(z_data)s->descr->type_num != %(typenum_zd)s)) { || (PyArray_DESCR(%(z_data)s)->type_num != %(typenum_zd)s)) {
{Py_XDECREF(%(z_data)s);} {Py_XDECREF(%(z_data)s);}
npy_intp dims[] = {0}; npy_intp dims[] = {0};
dims[0] = %(p_data)s->dimensions[0]; dims[0] = PyArray_DIMS(%(p_data)s)[0];
%(z_data)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(z_data)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
%(typenum_zd)s); %(typenum_zd)s);
} }
if (!%(z_ind)s if (!%(z_ind)s
|| (%(z_ind)s->dimensions[0] != %(p_ind)s->dimensions[0]) || (PyArray_DIMS(%(z_ind)s)[0] != PyArray_DIMS(%(p_ind)s)[0])
|| (%(z_ind)s->descr->type_num != %(typenum_zi)s)) { || (PyArray_DESCR(%(z_ind)s)->type_num != %(typenum_zi)s)) {
{Py_XDECREF(%(z_ind)s);} {Py_XDECREF(%(z_ind)s);}
npy_intp dims[] = {0}; npy_intp dims[] = {0};
dims[0] = %(p_ind)s->dimensions[0]; dims[0] = PyArray_DIMS(%(p_ind)s)[0];
%(z_ind)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(z_ind)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
%(typenum_zi)s); %(typenum_zi)s);
} }
if (!%(z_ptr)s if (!%(z_ptr)s
|| (%(z_ptr)s->dimensions[0] != %(p_ptr)s->dimensions[0]) || (PyArray_DIMS(%(z_ptr)s)[0] != PyArray_DIMS(%(p_ptr)s)[0])
|| (%(z_ptr)s->descr->type_num != %(typenum_zp)s)) { || (PyArray_DESCR(%(z_ptr)s)->type_num != %(typenum_zp)s)) {
{Py_XDECREF(%(z_ptr)s);} {Py_XDECREF(%(z_ptr)s);}
npy_intp dims[] = {0}; npy_intp dims[] = {0};
dims[0] = %(p_ptr)s->dimensions[0]; dims[0] = PyArray_DIMS(%(p_ptr)s)[0];
%(z_ptr)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(z_ptr)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
%(typenum_zp)s); %(typenum_zp)s);
} }
{ {
// Product of MxK and NxK, output MxN // Product of MxK and NxK, output MxN
npy_intp M = %(x)s->dimensions[0]; npy_intp M = PyArray_DIMS(%(x)s)[0];
npy_intp N = %(y)s->dimensions[0]; npy_intp N = PyArray_DIMS(%(y)s)[0];
npy_intp K = %(y)s->dimensions[1]; npy_intp K = PyArray_DIMS(%(y)s)[1];
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data; const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data;
...@@ -1612,17 +1613,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;} ...@@ -1612,17 +1613,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)%(z_ind)s->data; dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)%(z_ind)s->data;
dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)%(z_ptr)s->data; dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)%(z_ptr)s->data;
const npy_intp Sdx = %(x)s->strides[1]/%(x)s->descr->elsize; const npy_intp Sdx = %(x)s->strides[1]/PyArray_DESCR(%(x)s)->elsize;
const npy_intp Sdy = %(y)s->strides[1]/%(y)s->descr->elsize; const npy_intp Sdy = %(y)s->strides[1]/PyArray_DESCR(%(y)s)->elsize;
const npy_intp Sdpd = %(p_data)s->strides[0] / %(p_data)s->descr->elsize; const npy_intp Sdpd = %(p_data)s->strides[0] / PyArray_DESCR(%(p_data)s)->elsize;
const npy_intp Sdpi = %(p_ind)s->strides[0] / %(p_ind)s->descr->elsize; const npy_intp Sdpi = %(p_ind)s->strides[0] / PyArray_DESCR(%(p_ind)s)->elsize;
const npy_intp Sdpp = %(p_ptr)s->strides[0] / %(p_ptr)s->descr->elsize; const npy_intp Sdpp = %(p_ptr)s->strides[0] / PyArray_DESCR(%(p_ptr)s)->elsize;
const npy_intp Sdzd = %(z_data)s->strides[0] / %(z_data)s->descr->elsize; const npy_intp Sdzd = %(z_data)s->strides[0] / PyArray_DESCR(%(z_data)s)->elsize;
const npy_intp Sdzi = %(z_ind)s->strides[0] / %(z_ind)s->descr->elsize; const npy_intp Sdzi = %(z_ind)s->strides[0] / PyArray_DESCR(%(z_ind)s)->elsize;
const npy_intp Sdzp = %(z_ptr)s->strides[0] / %(z_ptr)s->descr->elsize; const npy_intp Sdzp = %(z_ptr)s->strides[0] / PyArray_DESCR(%(z_ptr)s)->elsize;
memcpy(Dzi, Dpi, %(p_ind)s->dimensions[0]*sizeof(dtype_%(p_ind)s)); memcpy(Dzi, Dpi, PyArray_DIMS(%(p_ind)s)[0]*sizeof(dtype_%(p_ind)s));
memcpy(Dzp, Dpp, %(p_ptr)s->dimensions[0]*sizeof(dtype_%(p_ptr)s)); memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s));
for (npy_int32 m = 0; m < M; ++m) { for (npy_int32 m = 0; m < M; ++m) {
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) { for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
......
...@@ -1374,7 +1374,7 @@ def test_sparse_shared_memory(): ...@@ -1374,7 +1374,7 @@ def test_sparse_shared_memory():
theano.In(y, mutable=True)], z, mode='FAST_RUN') theano.In(y, mutable=True)], z, mode='FAST_RUN')
def f_(x, y, m1=m1, m2=m2): def f_(x, y, m1=m1, m2=m2):
return numpy.dot(x * 3, m1) + numpy.dot(y * 2, m2) return ((x * 3) * m1) + ((y * 2) * m2)
assert SparseType.may_share_memory(a, a) # This is trivial assert SparseType.may_share_memory(a, a) # This is trivial
result = f(a, a) result = f(a, a)
......
...@@ -1972,7 +1972,7 @@ class ScalarFromTensor(Op): ...@@ -1972,7 +1972,7 @@ class ScalarFromTensor(Op):
z, = outputs z, = outputs
fail = sub['fail'] fail = sub['fail']
return """ return """
%(z)s = ((dtype_%(x)s*)(%(x)s->data))[0]; %(z)s = ((dtype_%(x)s*)(PyArray_DATA(%(x)s)))[0];
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -3087,14 +3087,14 @@ class Alloc(gof.Op): ...@@ -3087,14 +3087,14 @@ class Alloc(gof.Op):
# Initialize shape # Initialize shape
for i, shp_i in enumerate(inp[1:]): for i, shp_i in enumerate(inp[1:]):
code += """ code += """
shape[%(i)s] = ((dtype_%(shp_i)s*) %(shp_i)s->data)[0]; shape[%(i)s] = ((dtype_%(shp_i)s*) PyArray_DATA(%(shp_i)s))[0];
""" % dict(i=i, shp_i=shp_i) """ % dict(i=i, shp_i=shp_i)
code += """ code += """
int need_new_out = (NULL == %(zz)s); int need_new_out = (NULL == %(zz)s);
for (int i = 0; i < %(ndim)s; i++) for (int i = 0; i < %(ndim)s; i++)
need_new_out = (need_new_out need_new_out = (need_new_out
|| (%(zz)s->dimensions[i] != shape[i])); || (PyArray_DIMS(%(zz)s)[i] != shape[i]));
if (need_new_out) if (need_new_out)
{ {
...@@ -4042,14 +4042,14 @@ class Subtensor(Op): ...@@ -4042,14 +4042,14 @@ class Subtensor(Op):
//TODO: give this Op a second output so that this view can be cached //TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure //TODO: alternatively, fix the memory leak on failure
Py_INCREF(%(x)s->descr); Py_INCREF(PyArray_DESCR(%(x)s));
PyArrayObject * xview = (PyArrayObject*)PyArray_NewFromDescr( PyArrayObject * xview = (PyArrayObject*)PyArray_NewFromDescr(
&PyArray_Type, &PyArray_Type,
%(x)s->descr, PyArray_DESCR(%(x)s),
%(view_ndim)s, %(view_ndim)s,
%(x)s->dimensions, PyArray_DIMS(%(x)s),
%(x)s->strides, PyArray_STRIDES(%(x)s),
%(x)s->data, PyArray_DATA(%(x)s),
%(x)s->flags, %(x)s->flags,
NULL); NULL);
if (!xview) if (!xview)
...@@ -4057,22 +4057,22 @@ class Subtensor(Op): ...@@ -4057,22 +4057,22 @@ class Subtensor(Op):
%(fail)s; %(fail)s;
} }
if ((xview->dimensions == %(x)s->dimensions) if ((PyArray_DIMS(xview) == PyArray_DIMS(%(x)s))
&& (%(x)s->dimensions != NULL)) && (PyArray_DIMS(%(x)s) != NULL))
{ {
PyErr_Format(PyExc_ValueError, "x and xview" PyErr_Format(PyExc_ValueError, "x and xview"
"(with %%d dims) have the same dimensions" "(with %%d dims) have the same dimensions"
" pointers: %%p and %%p", " pointers: %%p and %%p",
%(x)s->nd, xview->dimensions, %(x)s->dimensions); PyArray_NDIM(%(x)s), PyArray_DIMS(xview), PyArray_DIMS(%(x)s));
%(fail)s; %(fail)s;
} }
if (xview->strides == %(x)s->strides if (PyArray_STRIDES(xview) == PyArray_STRIDES(%(x)s)
&& (%(x)s->dimensions != NULL)) && (PyArray_DIMS(%(x)s) != NULL))
{ {
PyErr_Format(PyExc_ValueError, "x and xview" PyErr_Format(PyExc_ValueError, "x and xview"
"(with %%d dims) have the same strides" "(with %%d dims) have the same strides"
" pointers: %%p and %%p", " pointers: %%p and %%p",
%(x)s->nd, xview->strides, %(x)s->strides); PyArray_NDIM(%(x)s), PyArray_STRIDES(xview), PyArray_STRIDES(%(x)s));
%(fail)s; %(fail)s;
} }
...@@ -4080,7 +4080,7 @@ class Subtensor(Op): ...@@ -4080,7 +4080,7 @@ class Subtensor(Op):
{ {
if (is_slice[outer_ii]) if (is_slice[outer_ii])
{ {
npy_intp length = %(x)s->dimensions[outer_ii]; npy_intp length = PyArray_DIMS(%(x)s)[outer_ii];
npy_intp slicelength; npy_intp slicelength;
npy_intp start = subtensor_spec[spec_pos+0]; npy_intp start = subtensor_spec[spec_pos+0];
npy_intp stop = subtensor_spec[spec_pos+1]; npy_intp stop = subtensor_spec[spec_pos+1];
...@@ -4144,9 +4144,9 @@ class Subtensor(Op): ...@@ -4144,9 +4144,9 @@ class Subtensor(Op):
} }
assert (slicelength <= length); assert (slicelength <= length);
xview->data += %(x)s->strides[outer_ii] * start; xview->data += PyArray_STRIDES(%(x)s)[outer_ii] * start;
xview->dimensions[inner_ii] = slicelength; PyArray_DIMS(xview)[inner_ii] = slicelength;
xview->strides[inner_ii] = %(x)s->strides[outer_ii] * step; PyArray_STRIDES(xview)[inner_ii] = PyArray_STRIDES(%(x)s)[outer_ii] * step;
inner_ii += 1; inner_ii += 1;
spec_pos += 3; spec_pos += 3;
...@@ -4154,12 +4154,12 @@ class Subtensor(Op): ...@@ -4154,12 +4154,12 @@ class Subtensor(Op):
else // tuple coord `outer_ii` is an int else // tuple coord `outer_ii` is an int
{ {
int idx = subtensor_spec[spec_pos]; int idx = subtensor_spec[spec_pos];
if (idx < 0) idx += %(x)s->dimensions[outer_ii]; if (idx < 0) idx += PyArray_DIMS(%(x)s)[outer_ii];
if (idx >= 0) if (idx >= 0)
{ {
if (idx < %(x)s->dimensions[outer_ii]) if (idx < PyArray_DIMS(%(x)s)[outer_ii])
{ {
xview->data += %(x)s->strides[outer_ii] * idx; xview->data += PyArray_STRIDES(%(x)s)[outer_ii] * idx;
} }
else else
{ {
...@@ -4176,16 +4176,16 @@ class Subtensor(Op): ...@@ -4176,16 +4176,16 @@ class Subtensor(Op):
spec_pos += 1; spec_pos += 1;
} }
} }
assert (inner_ii <= xview->nd); assert (inner_ii <= PyArray_NDIM(xview));
while (inner_ii < xview->nd) while (inner_ii < PyArray_NDIM(xview))
{ {
assert (outer_ii < %(x)s->nd); assert (outer_ii < PyArray_NDIM(%(x)s));
xview->dimensions[inner_ii] = %(x)s->dimensions[outer_ii]; PyArray_DIMS(xview)[inner_ii] = PyArray_DIMS(%(x)s)[outer_ii];
xview->strides[inner_ii] = %(x)s->strides[outer_ii]; PyArray_STRIDES(xview)[inner_ii] = PyArray_STRIDES(%(x)s)[outer_ii];
inner_ii += 1; inner_ii += 1;
outer_ii += 1; outer_ii += 1;
} }
PyArray_UpdateFlags(xview, NPY_C_CONTIGUOUS|NPY_F_CONTIGUOUS); PyArray_UpdateFlags(xview, NPY_ARRAY_C_CONTIGUOUS|NPY_F_CONTIGUOUS);
""" % locals() """ % locals()
# print rval # print rval
return rval return rval
...@@ -4203,7 +4203,7 @@ class Subtensor(Op): ...@@ -4203,7 +4203,7 @@ class Subtensor(Op):
part1 = """ part1 = """
if (%(z)s) Py_DECREF(%(z)s); if (%(z)s) Py_DECREF(%(z)s);
Py_INCREF(py_%(x)s); Py_INCREF(py_%(x)s);
xview->base = py_%(x)s; PyArray_BASE(xview) = py_%(x)s;
assert(py_%(x)s == (PyObject*)%(x)s); assert(py_%(x)s == (PyObject*)%(x)s);
%(z)s = xview; %(z)s = xview;
""" % locals() """ % locals()
...@@ -4504,7 +4504,7 @@ class IncSubtensor(Op): ...@@ -4504,7 +4504,7 @@ class IncSubtensor(Op):
{ {
if (%(z)s) Py_DECREF(%(z)s); if (%(z)s) Py_DECREF(%(z)s);
%(z)s = (PyArrayObject*)PyArray_FromAny(py_%(x)s, NULL, 0, 0, %(z)s = (PyArrayObject*)PyArray_FromAny(py_%(x)s, NULL, 0, 0,
NPY_ENSURECOPY, NULL); NPY_ARRAY_ENSURECOPY, NULL);
} }
""" % locals() """ % locals()
...@@ -4529,7 +4529,7 @@ class IncSubtensor(Op): ...@@ -4529,7 +4529,7 @@ class IncSubtensor(Op):
if (add_rval) if (add_rval)
{ {
assert (PyArray_Check((PyObject*)add_rval)); assert (PyArray_Check((PyObject*)add_rval));
assert (add_rval->data == xview->data); assert (PyArray_DATA(add_rval) == PyArray_DATA(xview));
Py_DECREF(add_rval); Py_DECREF(add_rval);
} }
else else
...@@ -5373,7 +5373,7 @@ class Reshape(Op): ...@@ -5373,7 +5373,7 @@ class Reshape(Op):
new_ndim = self.ndim new_ndim = self.ndim
fail = sub['fail'] fail = sub['fail']
return """ return """
assert (%(shp)s->nd == 1); assert (PyArray_NDIM(%(shp)s) == 1);
npy_intp new_dims[%(new_ndim)s]; npy_intp new_dims[%(new_ndim)s];
PyArray_Dims newshape; PyArray_Dims newshape;
newshape.ptr = new_dims; newshape.ptr = new_dims;
...@@ -5385,7 +5385,7 @@ class Reshape(Op): ...@@ -5385,7 +5385,7 @@ class Reshape(Op):
// -- will err if this will downcast. This could happen if the // -- will err if this will downcast. This could happen if the
// -- user pass an int64 dtype, but npy_intp endup being int32. // -- user pass an int64 dtype, but npy_intp endup being int32.
new_dims[ii] = ((dtype_%(shp)s*)( new_dims[ii] = ((dtype_%(shp)s*)(
%(shp)s->data + ii * %(shp)s->strides[0]))[0]; PyArray_DATA(%(shp)s) + ii * PyArray_STRIDES(%(shp)s)[0]))[0];
} }
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject *) PyArray_Newshape(%(x)s, &newshape, %(z)s = (PyArrayObject *) PyArray_Newshape(%(x)s, &newshape,
......
...@@ -493,16 +493,16 @@ class GemmRelated(Op): ...@@ -493,16 +493,16 @@ class GemmRelated(Op):
declare_NS = """ declare_NS = """
int unit = 0; int unit = 0;
int type_num = %(_x)s->descr->type_num; int type_num = PyArray_DESCR(%(_x)s)->type_num;
int type_size = %(_x)s->descr->elsize; // in bytes int type_size = PyArray_DESCR(%(_x)s)->elsize; // in bytes
npy_intp* Nx = %(_x)s->dimensions; npy_intp* Nx = PyArray_DIMS(%(_x)s);
npy_intp* Ny = %(_y)s->dimensions; npy_intp* Ny = PyArray_DIMS(%(_y)s);
npy_intp* Nz = 0; //%(_zout)s->dimensions; npy_intp* Nz = 0; //PyArray_DIMS(%(_zout)s);
npy_intp* Sx = %(_x)s->strides; npy_intp* Sx = PyArray_STRIDES(%(_x)s);
npy_intp* Sy = %(_y)s->strides; npy_intp* Sy = PyArray_STRIDES(%(_y)s);
npy_intp* Sz = 0; //%(_zout)s->strides; npy_intp* Sz = 0; //PyArray_STRIDES(%(_zout)s);
//strides for x, y, z in dimensions 0, 1 //strides for x, y, z in dimensions 0, 1
int sx_0, sx_1, sy_0, sy_1, sz_0, sz_1; int sx_0, sx_1, sy_0, sy_1, sz_0, sz_1;
...@@ -511,39 +511,49 @@ class GemmRelated(Op): ...@@ -511,39 +511,49 @@ class GemmRelated(Op):
#setup_z_Nz_Sz = None #setup_z_Nz_Sz = None
check_xyz_rank2 = """ check_xyz_rank2 = """
if (%(_x)s->nd != 2) { if (PyArray_NDIM(%(_x)s) != 2) {
PyErr_Format(PyExc_NotImplementedError, "rank(x) != 2. rank(x) is %%d.", %(_x)s->nd); %(fail)s;} PyErr_Format(PyExc_NotImplementedError,
if (%(_y)s->nd != 2) { "rank(x) != 2. rank(x) is %%d.",
PyErr_Format(PyExc_NotImplementedError, "rank(y) != 2. rank(y) is %%d.", %(_y)s->nd); %(fail)s;} PyArray_NDIM(%(_x)s));
if (%(_zout)s && %(_zout)s->nd != 2) { %(fail)s;
PyErr_Format(PyExc_NotImplementedError, "rank(z) != 2. rank(z) is %%d.", %(_zout)s->nd); %(fail)s;} }
if (PyArray_NDIM(%(_y)s) != 2) {
PyErr_Format(PyExc_NotImplementedError,
"rank(y) != 2. rank(y) is %%d.", PyArray_NDIM(%(_y)s));
%(fail)s;
}
if (%(_zout)s && PyArray_NDIM(%(_zout)s) != 2) {
PyErr_Format(PyExc_NotImplementedError,
"rank(z) != 2. rank(z) is %%d.", PyArray_NDIM(%(_zout)s));
%(fail)s;
}
""" """
check_xyz_double_or_float = """ check_xyz_double_or_float = """
if ((%(_x)s->descr->type_num != PyArray_DOUBLE) if ((PyArray_DESCR(%(_x)s)->type_num != NPY_DOUBLE)
&& (%(_x)s->descr->type_num != PyArray_FLOAT)) && (PyArray_DESCR(%(_x)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(x) is not double or float"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "type(x) is not double or float"); %(fail)s;}
if ((%(_y)s->descr->type_num != PyArray_DOUBLE) if ((PyArray_DESCR(%(_y)s)->type_num != NPY_DOUBLE)
&& (%(_y)s->descr->type_num != PyArray_FLOAT)) && (PyArray_DESCR(%(_y)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(y) is not double or float"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "type(y) is not double or float"); %(fail)s;}
if ((%(_zout)s->descr->type_num != PyArray_DOUBLE) if ((PyArray_DESCR(%(_zout)s)->type_num != NPY_DOUBLE)
&& (%(_zout)s->descr->type_num != PyArray_FLOAT)) && (PyArray_DESCR(%(_zout)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(z) is not double or float"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "type(z) is not double or float"); %(fail)s;}
if ((%(_x)s->descr->type_num != %(_y)s->descr->type_num) if ((PyArray_DESCR(%(_x)s)->type_num != PyArray_DESCR(%(_y)s)->type_num)
||(%(_x)s->descr->type_num != %(_zout)s->descr->type_num)) ||(PyArray_DESCR(%(_x)s)->type_num != PyArray_DESCR(%(_zout)s)->type_num))
{ PyErr_SetString(PyExc_NotImplementedError, "type(x), type(y), type(z) are not all the same"); %(fail)s; } { PyErr_SetString(PyExc_NotImplementedError, "type(x), type(y), type(z) are not all the same"); %(fail)s; }
""" """
#it is not necessary that a or b have the same type as x,y,z #it is not necessary that a or b have the same type as x,y,z
check_ab_double_or_float = """ check_ab_double_or_float = """
if ((%(_a)s->descr->type_num != PyArray_DOUBLE) if ((PyArray_DESCR(%(_a)s)->type_num != NPY_DOUBLE)
&& (%(_a)s->descr->type_num != PyArray_FLOAT)) && (PyArray_DESCR(%(_a)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(a) is not double or float"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "type(a) is not double or float"); %(fail)s;}
if ((%(_b)s->descr->type_num != PyArray_DOUBLE) if ((PyArray_DESCR(%(_b)s)->type_num != NPY_DOUBLE)
&& (%(_b)s->descr->type_num != PyArray_FLOAT)) && (PyArray_DESCR(%(_b)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(b) is not double or float"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "type(b) is not double or float"); %(fail)s;}
""" """
...@@ -587,7 +597,7 @@ class GemmRelated(Op): ...@@ -587,7 +597,7 @@ class GemmRelated(Op):
%(fail)s %(fail)s
Py_XDECREF(%(_x)s); Py_XDECREF(%(_x)s);
%(_x)s = _x_copy; %(_x)s = _x_copy;
Sx = %(_x)s->strides; Sx = PyArray_STRIDES(%(_x)s);
} }
if ((Sy[0] < 1) || (Sy[1] < 1) || (Sy[0] MOD type_size) || (Sy[1] MOD type_size) if ((Sy[0] < 1) || (Sy[1] < 1) || (Sy[0] MOD type_size) || (Sy[1] MOD type_size)
...@@ -598,7 +608,7 @@ class GemmRelated(Op): ...@@ -598,7 +608,7 @@ class GemmRelated(Op):
%(fail)s %(fail)s
Py_XDECREF(%(_y)s); Py_XDECREF(%(_y)s);
%(_y)s = _y_copy; %(_y)s = _y_copy;
Sy = %(_y)s->strides; Sy = PyArray_STRIDES(%(_y)s);
} }
if ((Sz[0] < 1) || (Sz[1] < 1) || (Sz[0] MOD type_size) || (Sz[1] MOD type_size) if ((Sz[0] < 1) || (Sz[1] < 1) || (Sz[0] MOD type_size) || (Sz[1] MOD type_size)
...@@ -609,7 +619,7 @@ class GemmRelated(Op): ...@@ -609,7 +619,7 @@ class GemmRelated(Op):
%(fail)s %(fail)s
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
%(_zout)s = _z_copy; %(_zout)s = _z_copy;
Sz = %(_zout)s->strides; Sz = PyArray_STRIDES(%(_zout)s);
} }
""" """
...@@ -644,7 +654,7 @@ class GemmRelated(Op): ...@@ -644,7 +654,7 @@ class GemmRelated(Op):
""" """
case_float = """ case_float = """
case PyArray_FLOAT: case NPY_FLOAT:
{ {
""" """
...@@ -677,7 +687,7 @@ class GemmRelated(Op): ...@@ -677,7 +687,7 @@ class GemmRelated(Op):
case_double = """ case_double = """
} }
break; break;
case PyArray_DOUBLE: case NPY_DOUBLE:
{ {
""" """
...@@ -878,25 +888,25 @@ class Gemm(GemmRelated): ...@@ -878,25 +888,25 @@ class Gemm(GemmRelated):
%(_zout)s = %(_z)s; %(_zout)s = %(_z)s;
Py_INCREF(%(_zout)s); Py_INCREF(%(_zout)s);
} }
Nz = %(_z)s->dimensions; Nz = PyArray_DIMS(%(_z)s);
Sz = %(_z)s->strides; Sz = PyArray_STRIDES(%(_z)s);
""" """
setup_z_Nz_Sz_outplace = """ setup_z_Nz_Sz_outplace = """
if ((NULL == %(_zout)s) if ((NULL == %(_zout)s)
|| (%(_zout)s->dimensions[0] != %(_z)s->dimensions[0]) || (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_z)s)[0])
|| (%(_zout)s->dimensions[1] != %(_z)s->dimensions[1]) || (PyArray_DIMS(%(_zout)s)[1] != PyArray_DIMS(%(_z)s)[1])
|| (%(_zout)s->strides[0] <= 0) || (PyArray_STRIDES(%(_zout)s)[0] <= 0)
|| (%(_zout)s->strides[1] <= 0) || (PyArray_STRIDES(%(_zout)s)[1] <= 0)
|| (%(_zout)s->strides[0] MOD type_size) || (PyArray_STRIDES(%(_zout)s)[0] MOD type_size)
|| (%(_zout)s->strides[1] MOD type_size) || (PyArray_STRIDES(%(_zout)s)[1] MOD type_size)
|| ((%(_zout)s->strides[0] != type_size) || ((PyArray_STRIDES(%(_zout)s)[0] != type_size)
&& (%(_zout)s->strides[1] != type_size))) && (PyArray_STRIDES(%(_zout)s)[1] != type_size)))
{ {
Py_XDECREF(%(_zout)s); Py_XDECREF(%(_zout)s);
npy_intp dims[2]; npy_intp dims[2];
dims[0] = %(_z)s->dimensions[0]; dims[0] = PyArray_DIMS(%(_z)s)[0];
dims[1] = %(_z)s->dimensions[1]; dims[1] = PyArray_DIMS(%(_z)s)[1];
%(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, %(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims,
type_num_%(_z)s); type_num_%(_z)s);
//fprintf(stderr, "Gemm Allocating %%i %%i\\n", dims[0], dims[1]); //fprintf(stderr, "Gemm Allocating %%i %%i\\n", dims[0], dims[1]);
...@@ -906,17 +916,17 @@ class Gemm(GemmRelated): ...@@ -906,17 +916,17 @@ class Gemm(GemmRelated):
%(fail)s %(fail)s
} }
} }
Nz = %(_zout)s->dimensions; Nz = PyArray_DIMS(%(_zout)s);
Sz = %(_zout)s->strides; Sz = PyArray_STRIDES(%(_zout)s);
if (%(_zout)s->descr->type_num == PyArray_FLOAT) if (PyArray_DESCR(%(_zout)s)->type_num == NPY_FLOAT)
{ {
float * zoutdata = (float*)%(_zout)s->data; float * zoutdata = (float*)PyArray_DATA(%(_zout)s);
int zoi = Sz[0] / sizeof(float); int zoi = Sz[0] / sizeof(float);
int zoj = Sz[1] / sizeof(float); int zoj = Sz[1] / sizeof(float);
const float * zdata = (float*)%(_z)s->data; const float * zdata = (float*)PyArray_DATA(%(_z)s);
int zi = %(_z)s->strides[0]/sizeof(float); int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(float);
int zj = %(_z)s->strides[1]/sizeof(float); int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(float);
for (int i = 0; i < Nz[0]; ++i) for (int i = 0; i < Nz[0]; ++i)
{ {
for (int j = 0; j < Nz[1]; ++j) for (int j = 0; j < Nz[1]; ++j)
...@@ -925,14 +935,14 @@ class Gemm(GemmRelated): ...@@ -925,14 +935,14 @@ class Gemm(GemmRelated):
} }
} }
} }
else if (%(_zout)s->descr->type_num == PyArray_DOUBLE) else if (PyArray_DESCR(%(_zout)s)->type_num == NPY_DOUBLE)
{ {
double * zoutdata = (double*) %(_zout)s->data; double * zoutdata = (double*) PyArray_DATA(%(_zout)s);
int zoi = Sz[0] / sizeof(double); int zoi = Sz[0] / sizeof(double);
int zoj = Sz[1] / sizeof(double); int zoj = Sz[1] / sizeof(double);
const double * zdata = (double*)%(_z)s->data; const double * zdata = (double*)PyArray_DATA(%(_z)s);
int zi = %(_z)s->strides[0]/sizeof(double); int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(double);
int zj = %(_z)s->strides[1]/sizeof(double); int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(double);
for (int i = 0; i < Nz[0]; ++i) for (int i = 0; i < Nz[0]; ++i)
{ {
for (int j = 0; j < Nz[1]; ++j) for (int j = 0; j < Nz[1]; ++j)
...@@ -951,22 +961,22 @@ class Gemm(GemmRelated): ...@@ -951,22 +961,22 @@ class Gemm(GemmRelated):
case_float_ab_constants = """ case_float_ab_constants = """
#define REAL float #define REAL float
float a = (%(_a)s->descr->type_num == PyArray_FLOAT) float a = (PyArray_DESCR(%(_a)s)->type_num == NPY_FLOAT)
? (REAL)(((float*)%(_a)s->data)[0]) ? (REAL)(((float*)PyArray_DATA(%(_a)s))[0])
: (REAL)(((double*)%(_a)s->data)[0]); : (REAL)(((double*)PyArray_DATA(%(_a)s))[0]);
float b = (%(_b)s->descr->type_num == PyArray_FLOAT) ? float b = (PyArray_DESCR(%(_b)s)->type_num == NPY_FLOAT) ?
(REAL)(((float*)%(_b)s->data)[0]) (REAL)(((float*)PyArray_DATA(%(_b)s))[0])
: (REAL)(((double*)%(_b)s->data)[0]); : (REAL)(((double*)PyArray_DATA(%(_b)s))[0]);
#undef REAL #undef REAL
""" """
case_double_ab_constants = """ case_double_ab_constants = """
#define REAL double #define REAL double
double a = (%(_a)s->descr->type_num == PyArray_FLOAT) double a = (PyArray_DESCR(%(_a)s)->type_num == NPY_FLOAT)
? (REAL)(((float*)%(_a)s->data)[0]) ? (REAL)(((float*)PyArray_DATA(%(_a)s))[0])
: (REAL)(((double*)%(_a)s->data)[0]); : (REAL)(((double*)PyArray_DATA(%(_a)s))[0]);
double b = (%(_b)s->descr->type_num == PyArray_FLOAT) ? double b = (PyArray_DESCR(%(_b)s)->type_num == NPY_FLOAT) ?
(REAL)(((float*)%(_b)s->data)[0]) (REAL)(((float*)PyArray_DATA(%(_b)s))[0])
: (REAL)(((double*)%(_b)s->data)[0]); : (REAL)(((double*)PyArray_DATA(%(_b)s))[0]);
#undef REAL #undef REAL
""" """
...@@ -1469,13 +1479,13 @@ class Dot22(GemmRelated): ...@@ -1469,13 +1479,13 @@ class Dot22(GemmRelated):
setup_z_Nz_Sz = """ setup_z_Nz_Sz = """
if ((NULL == %(_zout)s) if ((NULL == %(_zout)s)
|| (%(_zout)s->dimensions[0] != %(_x)s->dimensions[0]) || (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_x)s)[0])
|| (%(_zout)s->dimensions[1] != %(_y)s->dimensions[1])) || (PyArray_DIMS(%(_zout)s)[1] != PyArray_DIMS(%(_y)s)[1]))
{ {
if (NULL != %(_zout)s) Py_XDECREF(%(_zout)s); if (NULL != %(_zout)s) Py_XDECREF(%(_zout)s);
npy_intp dims[2]; npy_intp dims[2];
dims[0] = %(_x)s->dimensions[0]; dims[0] = PyArray_DIMS(%(_x)s)[0];
dims[1] = %(_y)s->dimensions[1]; dims[1] = PyArray_DIMS(%(_y)s)[1];
%(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims, %(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims,
type_num_%(_x)s); type_num_%(_x)s);
//fprintf(stderr, "Dot Allocating %%i %%i\\n", dims[0], dims[1]); //fprintf(stderr, "Dot Allocating %%i %%i\\n", dims[0], dims[1]);
...@@ -1485,8 +1495,8 @@ class Dot22(GemmRelated): ...@@ -1485,8 +1495,8 @@ class Dot22(GemmRelated):
%(fail)s %(fail)s
} }
} }
Nz = %(_zout)s->dimensions; Nz = PyArray_DIMS(%(_zout)s);
Sz = %(_zout)s->strides; Sz = PyArray_STRIDES(%(_zout)s);
""" """
check_ab_double_or_float = "" check_ab_double_or_float = ""
...@@ -1743,26 +1753,26 @@ class Dot22Scalar(GemmRelated): ...@@ -1743,26 +1753,26 @@ class Dot22Scalar(GemmRelated):
setup_z_Nz_Sz = Dot22.setup_z_Nz_Sz setup_z_Nz_Sz = Dot22.setup_z_Nz_Sz
check_ab_double_or_float = """ check_ab_double_or_float = """
if ((%(_a)s->descr->type_num != PyArray_DOUBLE) if ((PyArray_DESCR(%(_a)s)->type_num != NPY_DOUBLE)
&& (%(_a)s->descr->type_num != PyArray_FLOAT)) && (PyArray_DESCR(%(_a)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, {PyErr_SetString(PyExc_NotImplementedError,
"type(a) is not double or float"); %(fail)s;} "type(a) is not double or float"); %(fail)s;}
""" """
case_float_ab_constants = """ case_float_ab_constants = """
#define REAL float #define REAL float
float a = (%(_a)s->descr->type_num == PyArray_FLOAT) float a = (PyArray_DESCR(%(_a)s)->type_num == NPY_FLOAT)
? (REAL)(((float*)%(_a)s->data)[0]) ? (REAL)(((float*)PyArray_DATA(%(_a)s))[0])
: (REAL)(((double*)%(_a)s->data)[0]); : (REAL)(((double*)PyArray_DATA(%(_a)s))[0]);
#undef REAL #undef REAL
float b = 0.0; float b = 0.0;
""" """
case_double_ab_constants = """ case_double_ab_constants = """
#define REAL double #define REAL double
double a = (%(_a)s->descr->type_num == PyArray_FLOAT) double a = (PyArray_DESCR(%(_a)s)->type_num == NPY_FLOAT)
? (REAL)(((float*)%(_a)s->data)[0]) ? (REAL)(((float*)PyArray_DATA(%(_a)s))[0])
: (REAL)(((double*)%(_a)s->data)[0]); : (REAL)(((double*)PyArray_DATA(%(_a)s))[0]);
#undef REAL #undef REAL
double b = 0.0; double b = 0.0;
""" """
......
...@@ -33,35 +33,35 @@ def ger_c_code(A, a, x, y, Z, destructive, fail): ...@@ -33,35 +33,35 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
int elemsize ; int elemsize ;
if (%(A)s->nd != 2) if (PyArray_NDIM(%(A)s) != 2)
{PyErr_SetString(PyExc_NotImplementedError, "rank(A) != 2"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "rank(A) != 2"); %(fail)s;}
if (%(x)s->nd != 1) if (PyArray_NDIM(%(x)s) != 1)
{PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 1"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 1"); %(fail)s;}
if (%(y)s->nd != 1) if (PyArray_NDIM(%(y)s) != 1)
{PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 1"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 1"); %(fail)s;}
if (%(a)s->nd != 0) if (PyArray_NDIM(%(a)s) != 0)
{PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 0"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 0"); %(fail)s;}
if (%(A)s->descr->type_num != %(x)s->descr->type_num) if (PyArray_DESCR(%(A)s)->type_num != PyArray_DESCR(%(x)s)->type_num)
{ PyErr_SetString(PyExc_TypeError, "A vs. x"); %(fail)s; } { PyErr_SetString(PyExc_TypeError, "A vs. x"); %(fail)s; }
if (%(A)s->descr->type_num != %(y)s->descr->type_num) if (PyArray_DESCR(%(A)s)->type_num != PyArray_DESCR(%(y)s)->type_num)
{ PyErr_SetString(PyExc_TypeError, "A vs. y"); %(fail)s; } { PyErr_SetString(PyExc_TypeError, "A vs. y"); %(fail)s; }
if (%(A)s->dimensions[0] != %(x)s->dimensions[0]) if (PyArray_DIMS(%(A)s)[0] != PyArray_DIMS(%(x)s)[0])
{ {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Shape mismatch: A.shape[0] != x.shape[0]"); "Shape mismatch: A.shape[0] != x.shape[0]");
%(fail)s; %(fail)s;
} }
if (%(A)s->dimensions[1] != %(y)s->dimensions[0]) if (PyArray_DIMS(%(A)s)[1] != PyArray_DIMS(%(y)s)[0])
{ {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Shape mismatch: A.shape[1] != y.shape[0]"); "Shape mismatch: A.shape[1] != y.shape[0]");
%(fail)s; %(fail)s;
} }
if (%(A)s->descr->type_num == PyArray_DOUBLE) { elemsize = 8; } if (PyArray_DESCR(%(A)s)->type_num == NPY_DOUBLE) { elemsize = 8; }
else if (%(A)s->descr->type_num == PyArray_FLOAT) { elemsize = 4;} else if (PyArray_DESCR(%(A)s)->type_num == NPY_FLOAT) { elemsize = 4;}
else else
{ {
PyErr_SetString(PyExc_NotImplementedError, "complex CGer"); PyErr_SetString(PyExc_NotImplementedError, "complex CGer");
...@@ -70,22 +70,22 @@ def ger_c_code(A, a, x, y, Z, destructive, fail): ...@@ -70,22 +70,22 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
// copy A if !self.destructive or A is fully strided // copy A if !self.destructive or A is fully strided
if (!%(destructive)s if (!%(destructive)s
|| (%(A)s->strides[0] < 0) || (PyArray_STRIDES(%(A)s)[0] < 0)
|| (%(A)s->strides[1] < 0) || (PyArray_STRIDES(%(A)s)[1] < 0)
|| ((%(A)s->strides[0] != elemsize) || ((PyArray_STRIDES(%(A)s)[0] != elemsize)
&& (%(A)s->strides[1] != elemsize))) && (PyArray_STRIDES(%(A)s)[1] != elemsize)))
{ {
npy_intp dims[2]; npy_intp dims[2];
dims[0] = %(A)s->dimensions[0]; dims[0] = PyArray_DIMS(%(A)s)[0];
dims[1] = %(A)s->dimensions[1]; dims[1] = PyArray_DIMS(%(A)s)[1];
if ((NULL == %(Z)s) if ((NULL == %(Z)s)
|| (%(Z)s->dimensions[0] != %(A)s->dimensions[0]) || (PyArray_DIMS(%(Z)s)[0] != PyArray_DIMS(%(A)s)[0])
|| (%(Z)s->dimensions[1] != %(A)s->dimensions[1]) || (PyArray_DIMS(%(Z)s)[1] != PyArray_DIMS(%(A)s)[1])
|| (%(Z)s->strides[0] < 0) || (PyArray_STRIDES(%(Z)s)[0] < 0)
|| (%(Z)s->strides[1] < 0) || (PyArray_STRIDES(%(Z)s)[1] < 0)
|| ((%(Z)s->strides[0] != elemsize) || ((PyArray_STRIDES(%(Z)s)[0] != elemsize)
&& (%(Z)s->strides[1] != elemsize))) && (PyArray_STRIDES(%(Z)s)[1] != elemsize)))
{ {
if (%(Z)s) Py_XDECREF(%(Z)s); if (%(Z)s) Py_XDECREF(%(Z)s);
%(Z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(Z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
...@@ -101,14 +101,14 @@ def ger_c_code(A, a, x, y, Z, destructive, fail): ...@@ -101,14 +101,14 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
PyErr_SetString(PyExc_AssertionError, "%(Z)s != %(A)s"); PyErr_SetString(PyExc_AssertionError, "%(Z)s != %(A)s");
%(fail)s %(fail)s
} }
if (%(Z)s->descr->type_num == PyArray_FLOAT) if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
{ {
float * zoutdata = (float*)%(Z)s->data; float * zoutdata = (float*)PyArray_DATA(%(Z)s);
const float * zdata = (float*)%(A)s->data; const float * zdata = (float*)PyArray_DATA(%(A)s);
int Ai = %(A)s->strides[0]/sizeof(float); int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(float);
int Aj = %(A)s->strides[1]/sizeof(float); int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(float);
int Zi = %(Z)s->strides[0]/sizeof(float); int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(float);
int Zj = %(Z)s->strides[1]/sizeof(float); int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(float);
for (int i = 0; i < dims[0]; ++i) for (int i = 0; i < dims[0]; ++i)
{ {
for (int j = 0; j < dims[1]; ++j) for (int j = 0; j < dims[1]; ++j)
...@@ -117,14 +117,14 @@ def ger_c_code(A, a, x, y, Z, destructive, fail): ...@@ -117,14 +117,14 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
} }
} }
} }
else if (%(Z)s->descr->type_num == PyArray_DOUBLE) else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
{ {
double * zoutdata = (double*) %(Z)s->data; double * zoutdata = (double*) PyArray_DATA(%(Z)s);
const double * zdata = (double*)%(A)s->data; const double * zdata = (double*)PyArray_DATA(%(A)s);
int Ai = %(A)s->strides[0]/sizeof(double); int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(double);
int Aj = %(A)s->strides[1]/sizeof(double); int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(double);
int Zi = %(Z)s->strides[0]/sizeof(double); int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(double);
int Zj = %(Z)s->strides[1]/sizeof(double); int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(double);
for (int i = 0; i < dims[0]; ++i) for (int i = 0; i < dims[0]; ++i)
{ {
for (int j = 0; j < dims[1]; ++j) for (int j = 0; j < dims[1]; ++j)
...@@ -152,10 +152,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail): ...@@ -152,10 +152,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
} }
{ {
int Nz0 = %(Z)s->dimensions[0]; int Nz0 = PyArray_DIMS(%(Z)s)[0];
int Nz1 = %(Z)s->dimensions[1]; int Nz1 = PyArray_DIMS(%(Z)s)[1];
int Sx = %(x)s->strides[0] / elemsize; int Sx = PyArray_STRIDES(%(x)s)[0] / elemsize;
int Sy = %(y)s->strides[0] / elemsize; int Sy = PyArray_STRIDES(%(y)s)[0] / elemsize;
/* create appropriate strides for Z, if it is a row or column matrix. /* create appropriate strides for Z, if it is a row or column matrix.
* In that case, the value of the stride does not really matter, but * In that case, the value of the stride does not really matter, but
...@@ -163,11 +163,11 @@ def ger_c_code(A, a, x, y, Z, destructive, fail): ...@@ -163,11 +163,11 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
* - they are not smaller than the number of elements in the array, * - they are not smaller than the number of elements in the array,
* - they are not 0. * - they are not 0.
*/ */
int Sz0 = (Nz0 > 1) ? (%(Z)s->strides[0] / elemsize) : (Nz1 + 1); int Sz0 = (Nz0 > 1) ? (PyArray_STRIDES(%(Z)s)[0] / elemsize) : (Nz1 + 1);
int Sz1 = (Nz1 > 1) ? (%(Z)s->strides[1] / elemsize) : (Nz0 + 1); int Sz1 = (Nz1 > 1) ? (PyArray_STRIDES(%(Z)s)[1] / elemsize) : (Nz0 + 1);
dtype_%(x)s* x_data = (dtype_%(x)s*) %(x)s->data; dtype_%(x)s* x_data = (dtype_%(x)s*) PyArray_DATA(%(x)s);
dtype_%(y)s* y_data = (dtype_%(y)s*) %(y)s->data; dtype_%(y)s* y_data = (dtype_%(y)s*) PyArray_DATA(%(y)s);
// gemv expects pointers to the beginning of memory arrays, // gemv expects pointers to the beginning of memory arrays,
// but numpy provides provides a pointer to the first element, // but numpy provides provides a pointer to the first element,
// so when the stride is negative, we need to get the last one. // so when the stride is negative, we need to get the last one.
...@@ -176,24 +176,24 @@ def ger_c_code(A, a, x, y, Z, destructive, fail): ...@@ -176,24 +176,24 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
if (Sy < 0) if (Sy < 0)
y_data += (Nz1 - 1) * Sy; y_data += (Nz1 - 1) * Sy;
if (%(Z)s->strides[0] == elemsize) if (PyArray_STRIDES(%(Z)s)[0] == elemsize)
{ {
if (%(Z)s->descr->type_num == PyArray_FLOAT) if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
{ {
//fprintf(stderr, "A\\n"); //fprintf(stderr, "A\\n");
float alpha = ((dtype_%(a)s*)%(a)s->data)[0]; float alpha = ((dtype_%(a)s*)PyArray_DATA(%(a)s))[0];
sger_(&Nz0, &Nz1, &alpha, sger_(&Nz0, &Nz1, &alpha,
(float*)x_data, &Sx, (float*)x_data, &Sx,
(float*)y_data, &Sy, (float*)y_data, &Sy,
(float*)(%(Z)s->data), &Sz1); (float*)(PyArray_DATA(%(Z)s)), &Sz1);
} }
else if (%(Z)s->descr->type_num == PyArray_DOUBLE) else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
{ {
double alpha = ((dtype_%(a)s*)%(a)s->data)[0]; double alpha = ((dtype_%(a)s*)PyArray_DATA(%(a)s))[0];
dger_(&Nz0, &Nz1, &alpha, dger_(&Nz0, &Nz1, &alpha,
(double*)x_data, &Sx, (double*)x_data, &Sx,
(double*)y_data, &Sy, (double*)y_data, &Sy,
(double*)(%(Z)s->data), &Sz1); (double*)(PyArray_DATA(%(Z)s)), &Sz1);
} }
else { else {
PyErr_SetString(PyExc_NotImplementedError, PyErr_SetString(PyExc_NotImplementedError,
...@@ -201,26 +201,26 @@ def ger_c_code(A, a, x, y, Z, destructive, fail): ...@@ -201,26 +201,26 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
%(fail)s %(fail)s
} }
} }
else if (%(Z)s->strides[1] == elemsize) else if (PyArray_STRIDES(%(Z)s)[1] == elemsize)
{ {
if (%(Z)s->descr->type_num == PyArray_FLOAT) if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
{ {
//fprintf(stderr, "B %%i %%i %%i %%i\\n", Nz0, Nz1, Sz0, Sz1); //fprintf(stderr, "B %%i %%i %%i %%i\\n", Nz0, Nz1, Sz0, Sz1);
float alpha = ((dtype_%(a)s*)(%(a)s->data))[0]; float alpha = ((dtype_%(a)s*)(PyArray_DATA(%(a)s)))[0];
//fprintf(stderr, "alpha=%%f\\n", alpha); //fprintf(stderr, "alpha=%%f\\n", alpha);
//fprintf(stderr, "sx sy %%i %%i\\n", Sx, Sy); //fprintf(stderr, "sx sy %%i %%i\\n", Sx, Sy);
sger_(&Nz1, &Nz0, &alpha, sger_(&Nz1, &Nz0, &alpha,
(float*)y_data, &Sy, (float*)y_data, &Sy,
(float*)x_data, &Sx, (float*)x_data, &Sx,
(float*)(%(Z)s->data), &Sz0); (float*)(PyArray_DATA(%(Z)s)), &Sz0);
} }
else if (%(Z)s->descr->type_num == PyArray_DOUBLE) else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
{ {
double alpha = ((dtype_%(a)s*)%(a)s->data)[0]; double alpha = ((dtype_%(a)s*)PyArray_DATA(%(a)s))[0];
dger_(&Nz1, &Nz0, &alpha, dger_(&Nz1, &Nz0, &alpha,
(double*)y_data, &Sy, (double*)y_data, &Sy,
(double*)x_data, &Sx, (double*)x_data, &Sx,
(double*)(%(Z)s->data), &Sz0); (double*)(PyArray_DATA(%(Z)s)), &Sz0);
} }
else else
{ {
...@@ -290,68 +290,68 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail): ...@@ -290,68 +290,68 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
float fbeta; float fbeta;
double dbeta; double dbeta;
if (%(aa)s->nd != 1) if (PyArray_NDIM(%(aa)s) != 1)
{ {
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(aa) != 1"); PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(aa) != 1");
%(fail)s; %(fail)s;
} }
if (%(xx)s->nd != 2) if (PyArray_NDIM(%(xx)s) != 2)
{ {
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(xx) != 2"); PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(xx) != 2");
%(fail)s; %(fail)s;
} }
if (%(yy)s->nd != 1) if (PyArray_NDIM(%(yy)s) != 1)
{ {
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(yy) != 1"); PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(yy) != 1");
%(fail)s; %(fail)s;
} }
if (%(alpha)s->nd != 0) if (PyArray_NDIM(%(alpha)s) != 0)
{ {
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(alpha) != 0"); PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(alpha) != 0");
%(fail)s; %(fail)s;
} }
if (%(beta)s->nd != 0) if (PyArray_NDIM(%(beta)s) != 0)
{ {
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(beta) != 0"); PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(beta) != 0");
%(fail)s; %(fail)s;
} }
if (%(aa)s->descr->type_num != %(xx)s->descr->type_num) if (PyArray_DESCR(%(aa)s)->type_num != PyArray_DESCR(%(xx)s)->type_num)
{ PyErr_SetString(PyExc_TypeError, "Gemv: aa vs. xx"); %(fail)s; } { PyErr_SetString(PyExc_TypeError, "Gemv: aa vs. xx"); %(fail)s; }
if (%(aa)s->descr->type_num != %(yy)s->descr->type_num) if (PyArray_DESCR(%(aa)s)->type_num != PyArray_DESCR(%(yy)s)->type_num)
{ PyErr_SetString(PyExc_TypeError, "Gemv: aa vs. yy"); %(fail)s; } { PyErr_SetString(PyExc_TypeError, "Gemv: aa vs. yy"); %(fail)s; }
if (%(xx)s->dimensions[0] != %(aa)s->dimensions[0]) if (PyArray_DIMS(%(xx)s)[0] != PyArray_DIMS(%(aa)s)[0])
{ {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Shape mismatch: A.shape[0] != x.shape[0]"); "Shape mismatch: A.shape[0] != x.shape[0]");
%(fail)s; %(fail)s;
} }
if (%(xx)s->dimensions[1] != %(yy)s->dimensions[0]) if (PyArray_DIMS(%(xx)s)[1] != PyArray_DIMS(%(yy)s)[0])
{ {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Shape mismatch: A.shape[1] != y.shape[0]"); "Shape mismatch: A.shape[1] != y.shape[0]");
%(fail)s; %(fail)s;
} }
if (%(aa)s->descr->type_num == PyArray_DOUBLE) { elemsize = 8; } if (PyArray_DESCR(%(aa)s)->type_num == NPY_DOUBLE) { elemsize = 8; }
else if (%(aa)s->descr->type_num == PyArray_FLOAT) { elemsize = 4;} else if (PyArray_DESCR(%(aa)s)->type_num == NPY_FLOAT) { elemsize = 4;}
else { else {
PyErr_SetString(PyExc_NotImplementedError, "complex Gemv"); PyErr_SetString(PyExc_NotImplementedError, "complex Gemv");
%(fail)s; %(fail)s;
} }
fbeta = dbeta = ((dtype_%(beta)s*)%(beta)s->data)[0]; fbeta = dbeta = ((dtype_%(beta)s*)PyArray_DATA(%(beta)s))[0];
// copy aa if not destructive // copy aa if not destructive
if (!%(destructive)s) if (!%(destructive)s)
{ {
if ((NULL == %(zz)s) if ((NULL == %(zz)s)
|| (%(zz)s->dimensions[0] != %(aa)s->dimensions[0])) || (PyArray_DIMS(%(zz)s)[0] != PyArray_DIMS(%(aa)s)[0]))
{ {
if (%(zz)s) Py_XDECREF(%(zz)s); if (%(zz)s) Py_XDECREF(%(zz)s);
%(zz)s = (PyArrayObject*)PyArray_SimpleNew(1, %(zz)s = (PyArrayObject*)PyArray_SimpleNew(1,
%(aa)s->dimensions, type_num_%(aa)s); PyArray_DIMS(%(aa)s), type_num_%(aa)s);
if(!%(zz)s) { if(!%(zz)s) {
PyErr_SetString(PyExc_MemoryError, PyErr_SetString(PyExc_MemoryError,
"failed to alloc gemv output"); "failed to alloc gemv output");
...@@ -365,24 +365,24 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail): ...@@ -365,24 +365,24 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
} }
if (dbeta != 0) if (dbeta != 0)
{ {
if (%(zz)s->descr->type_num == PyArray_FLOAT) if (PyArray_DESCR(%(zz)s)->type_num == NPY_FLOAT)
{ {
float * zoutdata = (float*)%(zz)s->data; float * zoutdata = (float*)PyArray_DATA(%(zz)s);
const float * zdata = (float*)%(aa)s->data; const float * zdata = (float*)PyArray_DATA(%(aa)s);
int Ai = %(aa)s->strides[0]/sizeof(float); int Ai = PyArray_STRIDES(%(aa)s)[0]/sizeof(float);
int Zi = %(zz)s->strides[0]/sizeof(float); int Zi = PyArray_STRIDES(%(zz)s)[0]/sizeof(float);
for (int i = 0; i < %(aa)s->dimensions[0]; ++i) for (int i = 0; i < PyArray_DIMS(%(aa)s)[0]; ++i)
{ {
zoutdata[Zi*i] = fbeta * zdata[Ai*i]; zoutdata[Zi*i] = fbeta * zdata[Ai*i];
} }
} }
else if (%(xx)s->descr->type_num == PyArray_DOUBLE) else if (PyArray_DESCR(%(xx)s)->type_num == NPY_DOUBLE)
{ {
double * zoutdata = (double*) %(zz)s->data; double * zoutdata = (double*) PyArray_DATA(%(zz)s);
const double * zdata = (double*)%(aa)s->data; const double * zdata = (double*)PyArray_DATA(%(aa)s);
int Ai = %(aa)s->strides[0]/sizeof(double); int Ai = PyArray_STRIDES(%(aa)s)[0]/sizeof(double);
int Zi = %(zz)s->strides[0]/sizeof(double); int Zi = PyArray_STRIDES(%(zz)s)[0]/sizeof(double);
for (int i = 0; i < %(aa)s->dimensions[0]; ++i) for (int i = 0; i < PyArray_DIMS(%(aa)s)[0]; ++i)
{ {
zoutdata[Zi*i] = dbeta * zdata[Ai*i]; zoutdata[Zi*i] = dbeta * zdata[Ai*i];
} }
...@@ -409,20 +409,20 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail): ...@@ -409,20 +409,20 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
{ {
char TRANS = 'T'; char TRANS = 'T';
char NOTRANS = 'N'; char NOTRANS = 'N';
int Nx0 = %(xx)s->dimensions[0]; int Nx0 = PyArray_DIMS(%(xx)s)[0];
int Nx1 = %(xx)s->dimensions[1]; int Nx1 = PyArray_DIMS(%(xx)s)[1];
/* This formula is needed in the case where xx is actually a row or /* This formula is needed in the case where xx is actually a row or
* column matrix, because BLAS sometimes insists that the strides: * column matrix, because BLAS sometimes insists that the strides:
* - are not smaller than the number of elements in the array * - are not smaller than the number of elements in the array
* - are not 0. * - are not 0.
*/ */
int Sx0 = (Nx0 > 1) ? (%(xx)s->strides[0] / elemsize) : (Nx1 + 1); int Sx0 = (Nx0 > 1) ? (PyArray_STRIDES(%(xx)s)[0] / elemsize) : (Nx1 + 1);
int Sx1 = (Nx1 > 1) ? (%(xx)s->strides[1] / elemsize) : (Nx0 + 1); int Sx1 = (Nx1 > 1) ? (PyArray_STRIDES(%(xx)s)[1] / elemsize) : (Nx0 + 1);
int Sz = %(zz)s->strides[0] / elemsize; int Sz = PyArray_STRIDES(%(zz)s)[0] / elemsize;
int Sy = %(yy)s->strides[0] / elemsize; int Sy = PyArray_STRIDES(%(yy)s)[0] / elemsize;
dtype_%(yy)s* yy_data = (dtype_%(yy)s*) %(yy)s->data; dtype_%(yy)s* yy_data = (dtype_%(yy)s*) PyArray_DATA(%(yy)s);
dtype_%(zz)s* zz_data = (dtype_%(zz)s*) %(zz)s->data; dtype_%(zz)s* zz_data = (dtype_%(zz)s*) PyArray_DATA(%(zz)s);
// gemv expects pointers to the beginning of memory arrays, // gemv expects pointers to the beginning of memory arrays,
// but numpy provides provides a pointer to the first element, // but numpy provides provides a pointer to the first element,
// so when the stride is negative, we need to get the last one. // so when the stride is negative, we need to get the last one.
...@@ -439,10 +439,10 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail): ...@@ -439,10 +439,10 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
// gemv on reversed matrix and vectors // gemv on reversed matrix and vectors
// - if the copy is too long, maybe call vector/vector dot on // - if the copy is too long, maybe call vector/vector dot on
// each row instead // each row instead
if ((%(xx)s->strides[0] < 0) if ((PyArray_STRIDES(%(xx)s)[0] < 0)
|| (%(xx)s->strides[1] < 0) || (PyArray_STRIDES(%(xx)s)[1] < 0)
|| ((%(xx)s->strides[0] != elemsize) || ((PyArray_STRIDES(%(xx)s)[0] != elemsize)
&& (%(xx)s->strides[1] != elemsize))) && (PyArray_STRIDES(%(xx)s)[1] != elemsize)))
{ {
npy_intp dims[2]; npy_intp dims[2];
dims[0] = Nx0; dims[0] = Nx0;
...@@ -454,29 +454,29 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail): ...@@ -454,29 +454,29 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
%(fail)s %(fail)s
Py_XDECREF(%(xx)s); Py_XDECREF(%(xx)s);
%(xx)s = xx_copy; %(xx)s = xx_copy;
Sx0 = (Nx0 > 1) ? (%(xx)s->strides[0] / elemsize) : (Nx1 + 1); Sx0 = (Nx0 > 1) ? (PyArray_STRIDES(%(xx)s)[0] / elemsize) : (Nx1 + 1);
Sx1 = (Nx1 > 1) ? (%(xx)s->strides[1] / elemsize) : (Nx0 + 1); Sx1 = (Nx1 > 1) ? (PyArray_STRIDES(%(xx)s)[1] / elemsize) : (Nx0 + 1);
} }
if (%(xx)s->strides[0] == elemsize) if (PyArray_STRIDES(%(xx)s)[0] == elemsize)
{ {
if (%(xx)s->descr->type_num == PyArray_FLOAT) if (PyArray_DESCR(%(xx)s)->type_num == NPY_FLOAT)
{ {
//fprintf(stderr, "A\\n"); //fprintf(stderr, "A\\n");
float alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0]; float alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
sgemv_(&NOTRANS, &Nx0, &Nx1, sgemv_(&NOTRANS, &Nx0, &Nx1,
&alpha, &alpha,
(float*)(%(xx)s->data), &Sx1, (float*)(PyArray_DATA(%(xx)s)), &Sx1,
(float*)yy_data, &Sy, (float*)yy_data, &Sy,
&fbeta, &fbeta,
(float*)zz_data, &Sz); (float*)zz_data, &Sz);
} }
else if (%(xx)s->descr->type_num == PyArray_DOUBLE) else if (PyArray_DESCR(%(xx)s)->type_num == NPY_DOUBLE)
{ {
double alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0]; double alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
dgemv_(&NOTRANS, &Nx0, &Nx1, dgemv_(&NOTRANS, &Nx0, &Nx1,
&alpha, &alpha,
(double*)(%(xx)s->data), &Sx1, (double*)(PyArray_DATA(%(xx)s)), &Sx1,
(double*)yy_data, &Sy, (double*)yy_data, &Sy,
&dbeta, &dbeta,
(double*)zz_data, &Sz); (double*)zz_data, &Sz);
...@@ -488,28 +488,28 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail): ...@@ -488,28 +488,28 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
%(fail)s %(fail)s
} }
} }
else if (%(xx)s->strides[1] == elemsize) else if (PyArray_STRIDES(%(xx)s)[1] == elemsize)
{ {
if (%(xx)s->descr->type_num == PyArray_FLOAT) if (PyArray_DESCR(%(xx)s)->type_num == NPY_FLOAT)
{ {
//fprintf(stderr, "B %%i %%i %%i %%i\\n", //fprintf(stderr, "B %%i %%i %%i %%i\\n",
// Nz0, Nz1, Sz0, Sz1); // Nz0, Nz1, Sz0, Sz1);
float alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0]; float alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
//fprintf(stderr, "alpha=%%f\\n", alpha); //fprintf(stderr, "alpha=%%f\\n", alpha);
//fprintf(stderr, "sx sy %%i %%i\\n", Sx, Sy); //fprintf(stderr, "sx sy %%i %%i\\n", Sx, Sy);
sgemv_(&TRANS, &Nx1, &Nx0, sgemv_(&TRANS, &Nx1, &Nx0,
&alpha, &alpha,
(float*)(%(xx)s->data), &Sx0, (float*)(PyArray_DATA(%(xx)s)), &Sx0,
(float*)yy_data, &Sy, (float*)yy_data, &Sy,
&fbeta, &fbeta,
(float*)zz_data, &Sz); (float*)zz_data, &Sz);
} }
else if (%(xx)s->descr->type_num == PyArray_DOUBLE) else if (PyArray_DESCR(%(xx)s)->type_num == NPY_DOUBLE)
{ {
double alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0]; double alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
dgemv_(&TRANS, &Nx1, &Nx0, dgemv_(&TRANS, &Nx1, &Nx0,
&alpha, &alpha,
(double*)(%(xx)s->data), &Sx0, (double*)(PyArray_DATA(%(xx)s)), &Sx0,
(double*)yy_data, &Sy, (double*)yy_data, &Sy,
&dbeta, &dbeta,
(double*)zz_data, &Sz); (double*)zz_data, &Sz);
...@@ -534,7 +534,7 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail): ...@@ -534,7 +534,7 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
// the matrix has at least one dim of length 0 // the matrix has at least one dim of length 0
// so we do this loop, which either iterates over 0 elements // so we do this loop, which either iterates over 0 elements
// or else it does the right thing for length-0 x. // or else it does the right thing for length-0 x.
dtype_%(zz)s * zptr = (dtype_%(zz)s*)(%(zz)s->data); dtype_%(zz)s * zptr = (dtype_%(zz)s*)(PyArray_DATA(%(zz)s));
for (int i = 0; i < Nx0; ++i) for (int i = 0; i < Nx0; ++i)
{ {
zptr[i * Sz] *= dbeta; zptr[i * Sz] *= dbeta;
......
...@@ -794,41 +794,41 @@ def ____gemm_code(check_ab, a_init, b_init): ...@@ -794,41 +794,41 @@ def ____gemm_code(check_ab, a_init, b_init):
return """ return """
const char * error_string = NULL; const char * error_string = NULL;
int type_num = _x->descr->type_num; int type_num = PyArray_DESCR(_x)->type_num;
int type_size = _x->descr->elsize; // in bytes int type_size = PyArray_DESCR(_x)->elsize; // in bytes
npy_intp* Nx = _x->dimensions; npy_intp* Nx = PyArray_DIMS(_x);
npy_intp* Ny = _y->dimensions; npy_intp* Ny = PyArray_DIMS(_y);
npy_intp* Nz = _z->dimensions; npy_intp* Nz = PyArray_DIMS(_z);
npy_intp* Sx = _x->strides; npy_intp* Sx = PyArray_STRIDES(_x);
npy_intp* Sy = _y->strides; npy_intp* Sy = PyArray_STRIDES(_y);
npy_intp* Sz = _z->strides; npy_intp* Sz = PyArray_STRIDES(_z);
size_t sx_0, sx_1, sy_0, sy_1, sz_0, sz_1; size_t sx_0, sx_1, sy_0, sy_1, sz_0, sz_1;
int unit = 0; int unit = 0;
if (_x->nd != 2) goto _dot_execute_fallback; if (PyArray_NDIM(_x) != 2) goto _dot_execute_fallback;
if (_y->nd != 2) goto _dot_execute_fallback; if (PyArray_NDIM(_y) != 2) goto _dot_execute_fallback;
if (_z->nd != 2) goto _dot_execute_fallback; if (PyArray_NDIM(_z) != 2) goto _dot_execute_fallback;
%(check_ab)s %(check_ab)s
if ((_x->descr->type_num != PyArray_DOUBLE) if ((PyArray_DESCR(_x)->type_num != NPY_DOUBLE)
&& (_x->descr->type_num != PyArray_FLOAT)) && (PyArray_DESCR(_x)->type_num != NPY_FLOAT))
goto _dot_execute_fallback; goto _dot_execute_fallback;
if ((_y->descr->type_num != PyArray_DOUBLE) if ((PyArray_DESCR(_y)->type_num != NPY_DOUBLE)
&& (_y->descr->type_num != PyArray_FLOAT)) && (PyArray_DESCR(_y)->type_num != NPY_FLOAT))
goto _dot_execute_fallback; goto _dot_execute_fallback;
if ((_y->descr->type_num != PyArray_DOUBLE) if ((PyArray_DESCR(_y)->type_num != NPY_DOUBLE)
&& (_y->descr->type_num != PyArray_FLOAT)) && (PyArray_DESCR(_y)->type_num != NPY_FLOAT))
goto _dot_execute_fallback; goto _dot_execute_fallback;
if ((_x->descr->type_num != _y->descr->type_num) if ((PyArray_DESCR(_x)->type_num != PyArray_DESCR(_y)->type_num)
||(_x->descr->type_num != _z->descr->type_num)) ||(PyArray_DESCR(_x)->type_num != PyArray_DESCR(_z)->type_num))
goto _dot_execute_fallback; goto _dot_execute_fallback;
...@@ -863,7 +863,7 @@ def ____gemm_code(check_ab, a_init, b_init): ...@@ -863,7 +863,7 @@ def ____gemm_code(check_ab, a_init, b_init):
switch (type_num) switch (type_num)
{ {
case PyArray_FLOAT: case NPY_FLOAT:
{ {
#define REAL float #define REAL float
float a = %(a_init)s; float a = %(a_init)s;
...@@ -888,7 +888,7 @@ def ____gemm_code(check_ab, a_init, b_init): ...@@ -888,7 +888,7 @@ def ____gemm_code(check_ab, a_init, b_init):
#undef REAL #undef REAL
} }
break; break;
case PyArray_DOUBLE: case NPY_DOUBLE:
{ {
#define REAL double #define REAL double
double a = %(a_init)s; double a = %(a_init)s;
......
...@@ -270,7 +270,7 @@ class DimShuffle(Op): ...@@ -270,7 +270,7 @@ class DimShuffle(Op):
nd_in = len(self.input_broadcastable) nd_in = len(self.input_broadcastable)
nd_out = len(self.new_order) nd_out = len(self.new_order)
check_input_nd = [('if (%(input)s->nd != ' + str(nd_in) + ')' check_input_nd = [('if (PyArray_NDIM(%(input)s) != ' + str(nd_in) + ')'
'{PyErr_SetString(PyExc_NotImplementedError, "input nd"); %(fail)s;}')] '{PyErr_SetString(PyExc_NotImplementedError, "input nd"); %(fail)s;}')]
clear_output = ['if (%(res)s) {Py_XDECREF(%(res)s);}'] clear_output = ['if (%(res)s) {Py_XDECREF(%(res)s);}']
...@@ -282,13 +282,13 @@ class DimShuffle(Op): ...@@ -282,13 +282,13 @@ class DimShuffle(Op):
'{ PyArrayObject * %(basename)s = %(input)s', 'Py_INCREF((PyObject*)%(basename)s)'] '{ PyArrayObject * %(basename)s = %(input)s', 'Py_INCREF((PyObject*)%(basename)s)']
else: else:
get_base = [('{ PyArrayObject * %(basename)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s, NULL,' get_base = [('{ PyArrayObject * %(basename)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s, NULL,'
'0, 0, NPY_ALIGNED|NPY_ENSURECOPY, NULL)')] '0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY, NULL)')]
shape_statements = ['npy_intp dimensions[%i]' % nd_out] shape_statements = ['npy_intp dimensions[%i]' % nd_out]
for i, o in enumerate(self.new_order): for i, o in enumerate(self.new_order):
if o != 'x': if o != 'x':
shape_statements += [('dimensions[' + str( shape_statements += [('dimensions[' + str(
i) + '] = %(basename)s->dimensions[' + str(o) + ']')] i) + '] = PyArray_DIMS(%(basename)s)[' + str(o) + ']')]
else: else:
shape_statements += [('dimensions[' + str(i) + '] = 1')] shape_statements += [('dimensions[' + str(i) + '] = 1')]
...@@ -298,7 +298,7 @@ class DimShuffle(Op): ...@@ -298,7 +298,7 @@ class DimShuffle(Op):
for i, o in enumerate(self.new_order): for i, o in enumerate(self.new_order):
if o != 'x': if o != 'x':
strides_statements += [('strides[' + str(i) strides_statements += [('strides[' + str(i)
+ '] = %(basename)s->strides[' + str(o) + ']')] + '] = PyArray_STRIDES(%(basename)s)[' + str(o) + ']')]
else: else:
strides_statements += [('strides[' + str(i) + '] = 0')] strides_statements += [('strides[' + str(i) + '] = 0')]
...@@ -311,7 +311,7 @@ class DimShuffle(Op): ...@@ -311,7 +311,7 @@ class DimShuffle(Op):
str(nd_out) + str(nd_out) +
'-1] == 0) strides[' + '-1] == 0) strides[' +
str(nd_out) + str(nd_out) +
'-1] = %(basename)s->descr->elsize' '-1] = PyArray_DESCR(%(basename)s)->elsize'
) )
for i in xrange(nd_out - 2, -1, -1): for i in xrange(nd_out - 2, -1, -1):
strides_statements.append( strides_statements.append(
...@@ -326,14 +326,20 @@ class DimShuffle(Op): ...@@ -326,14 +326,20 @@ class DimShuffle(Op):
('%(res)s = (PyArrayObject*)PyArray_New(&PyArray_Type, ' ('%(res)s = (PyArrayObject*)PyArray_New(&PyArray_Type, '
'' + str(nd_out) + ', dimensions, ' '' + str(nd_out) + ', dimensions, '
'PyArray_TYPE(%(basename)s), strides, ' 'PyArray_TYPE(%(basename)s), strides, '
'%(basename)s->data, PyArray_ITEMSIZE(%(basename)s), ' 'PyArray_DATA(%(basename)s), PyArray_ITEMSIZE(%(basename)s), '
#borrow only the writable flag from the base #borrow only the writable flag from the base
# the NPY_OWNDATA flag will default to 0. # the NPY_OWNDATA flag will default to 0.
'(NPY_WRITEABLE*PyArray_ISWRITEABLE(%(basename)s)), NULL)'), '(NPY_ARRAY_WRITEABLE*PyArray_ISWRITEABLE(%(basename)s)), NULL)'),
#recalculate flags: CONTIGUOUS, FORTRAN, ALIGNED #recalculate flags: CONTIGUOUS, FORTRAN, ALIGNED
'PyArray_UpdateFlags(%(res)s, NPY_UPDATE_ALL)', 'PyArray_UpdateFlags(%(res)s, NPY_ARRAY_UPDATE_ALL)',
#we are making a view in both inplace and non-inplace cases #we are making a view in both inplace and non-inplace cases
'%(res)s->base = (PyObject*)%(basename)s', """
#if NPY_VERSION <= 0x01000009
PyArray_BASE(%(res)s) = (PyObject*)%(basename)s;
#else
PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
#endif
"""
'}'] '}']
full_code = statements(check_input_nd full_code = statements(check_input_nd
...@@ -1341,7 +1347,7 @@ class CAReduce(Op): ...@@ -1341,7 +1347,7 @@ class CAReduce(Op):
pattern_ = str(pattern)[1:-1] pattern_ = str(pattern)[1:-1]
decl += """int tosum[]={%(pattern_)s};""" % locals() decl += """int tosum[]={%(pattern_)s};""" % locals()
alloc += """ alloc += """
for(int i=0;i<%(iname)s->nd;i++){ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
if(PyArray_DIMS(%(iname)s)[i]==0 && tosum[i]){ if(PyArray_DIMS(%(iname)s)[i]==0 && tosum[i]){
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"Input of CAReduce{%(scal_name)s} has zero-size on axis %%d",i); "Input of CAReduce{%(scal_name)s} has zero-size on axis %%d",i);
......
...@@ -47,7 +47,7 @@ def make_checks(loop_orders, dtypes, sub): ...@@ -47,7 +47,7 @@ def make_checks(loop_orders, dtypes, sub):
# tensor is as expected. # tensor is as expected.
min_nd = max(nonx) + 1 min_nd = max(nonx) + 1
init += """ init += """
if (%(var)s->nd < %(min_nd)s) { if (PyArray_NDIM(%(var)s) < %(min_nd)s) {
PyErr_SetString(PyExc_ValueError, "Not enough dimensions on input."); PyErr_SetString(PyExc_ValueError, "Not enough dimensions on input.");
%%(fail)s %%(fail)s
} }
...@@ -67,8 +67,8 @@ def make_checks(loop_orders, dtypes, sub): ...@@ -67,8 +67,8 @@ def make_checks(loop_orders, dtypes, sub):
# jump = stride - adjust # jump = stride - adjust
jump = "(%s) - (%s)" % ("%(var)s_stride%(index)s" % locals(), adjust) jump = "(%s) - (%s)" % ("%(var)s_stride%(index)s" % locals(), adjust)
init += """ init += """
%(var)s_n%(index)s = %(var)s->dimensions[%(index)s]; %(var)s_n%(index)s = PyArray_DIMS(%(var)s)[%(index)s];
%(var)s_stride%(index)s = %(var)s->strides[%(index)s] / sizeof(%(dtype)s); %(var)s_stride%(index)s = PyArray_STRIDES(%(var)s)[%(index)s] / sizeof(%(dtype)s);
%(var)s_jump%(index)s_%(j)s = %(jump)s; %(var)s_jump%(index)s_%(j)s = %(jump)s;
//printf("%(var)s_jump%(index)s_%(j)s is:"); //printf("%(var)s_jump%(index)s_%(j)s is:");
//std::cout << %(var)s_jump%(index)s_%(j)s << std::endl; //std::cout << %(var)s_jump%(index)s_%(j)s << std::endl;
...@@ -152,7 +152,7 @@ def make_alloc(loop_orders, dtype, sub): ...@@ -152,7 +152,7 @@ def make_alloc(loop_orders, dtype, sub):
PyArray_Dims new_dims; PyArray_Dims new_dims;
new_dims.len = %(nd)s; new_dims.len = %(nd)s;
new_dims.ptr = dims; new_dims.ptr = dims;
PyObject* success = PyArray_Resize(%(olv)s, &new_dims, 0, PyArray_CORDER); PyObject* success = PyArray_Resize(%(olv)s, &new_dims, 0, NPY_CORDER);
if (!success) { if (!success) {
// If we can't resize the ndarray we have we can allocate a new one. // If we can't resize the ndarray we have we can allocate a new one.
PyErr_Clear(); PyErr_Clear();
...@@ -215,11 +215,11 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub): ...@@ -215,11 +215,11 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub):
for j, index in enumerate(loop_order): for j, index in enumerate(loop_order):
if index != 'x': if index != 'x':
preloops.setdefault(j, "") preloops.setdefault(j, "")
preloops[j] += ("%%(lv%(i)s)s_iter = (%(dtype)s*)(%%(lv%(i)s)s->data);\n" % locals()) % sub preloops[j] += ("%%(lv%(i)s)s_iter = (%(dtype)s*)(PyArray_DATA(%%(lv%(i)s)s));\n" % locals()) % sub
break break
else: # all broadcastable else: # all broadcastable
preloops.setdefault(0, "") preloops.setdefault(0, "")
preloops[0] += ("%%(lv%(i)s)s_iter = (%(dtype)s*)(%%(lv%(i)s)s->data);\n" % locals()) % sub preloops[0] += ("%%(lv%(i)s)s_iter = (%(dtype)s*)(PyArray_DATA(%%(lv%(i)s)s));\n" % locals()) % sub
if len(loop_tasks) == 1: if len(loop_tasks) == 1:
s = preloops.get(0, "") s = preloops.get(0, "")
...@@ -263,7 +263,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub): ...@@ -263,7 +263,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
for i, index in enumerate(init_loop_orders[olv_index]): for i, index in enumerate(init_loop_orders[olv_index]):
if index != 'x': if index != 'x':
order_loops += """ order_loops += """
%(ovar)s_loops_it->first = abs(%(ovar)s->strides[%(index)i]); %(ovar)s_loops_it->first = abs(PyArray_STRIDES(%(ovar)s)[%(index)i]);
""" % locals() """ % locals()
else: else:
# Stride is 0 when dimension is broadcastable # Stride is 0 when dimension is broadcastable
...@@ -375,7 +375,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub): ...@@ -375,7 +375,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
declare_iter = "" declare_iter = ""
for i, dtype in enumerate(dtypes): for i, dtype in enumerate(dtypes):
var = sub["lv%i" % i] var = sub["lv%i" % i]
declare_iter += "%(var)s_iter = (%(dtype)s*)(%(var)s->data);\n" % locals() declare_iter += "%(var)s_iter = (%(dtype)s*)(PyArray_DATA(%(var)s));\n" % locals()
loop = inner_task loop = inner_task
for i in reversed(range(nnested)): for i in reversed(range(nnested)):
......
...@@ -161,7 +161,6 @@ class Conv3D(theano.Op): ...@@ -161,7 +161,6 @@ class Conv3D(theano.Op):
def c_compile_args(self): def c_compile_args(self):
flags = ldflags(libs=False, flags=True) flags = ldflags(libs=False, flags=True)
flags.append('-Werror')
return flags return flags
def c_lib_dirs(self): def c_lib_dirs(self):
...@@ -183,62 +182,62 @@ class Conv3D(theano.Op): ...@@ -183,62 +182,62 @@ class Conv3D(theano.Op):
//printf("\t\t\t\tConv3D c code\\n"); //printf("\t\t\t\tConv3D c code\\n");
//Check dimensionality of inputs //Check dimensionality of inputs
if (%(W)s->nd != 5) if (PyArray_NDIM(%(W)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "Conv3D: W must be a 5 dimensional tensor"); PyErr_Format(PyExc_ValueError, "Conv3D: W must be a 5 dimensional tensor");
%(fail)s %(fail)s
} }
if (%(V)s->nd != 5) if (PyArray_NDIM(%(V)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "Conv3D: V must be a 5 dimensional tensor"); PyErr_Format(PyExc_ValueError, "Conv3D: V must be a 5 dimensional tensor");
%(fail)s %(fail)s
} }
if (%(b)s->nd != 1) if (PyArray_NDIM(%(b)s) != 1)
{ {
PyErr_Format(PyExc_ValueError,"Conv3D: b must be a vector."); PyErr_Format(PyExc_ValueError,"Conv3D: b must be a vector.");
%(fail)s %(fail)s
} }
if (%(d)s->nd != 1) if (PyArray_NDIM(%(d)s) != 1)
{ {
PyErr_Format(PyExc_ValueError,"Conv3D: d must be a vector."); PyErr_Format(PyExc_ValueError,"Conv3D: d must be a vector.");
%(fail)s %(fail)s
} }
if (%(d)s->dimensions[0] != 3) if (PyArray_DIMS(%(d)s)[0] != 3)
{ {
PyErr_Format(PyExc_ValueError,"Conv3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)%(d)s->dimensions[0]); PyErr_Format(PyExc_ValueError,"Conv3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0]);
%(fail)s %(fail)s
} }
//Read and check sizes of inputs //Read and check sizes of inputs
{ // exta scope so error handler jumps don't cause errors { // exta scope so error handler jumps don't cause errors
const int batchSize = %(V)s->dimensions[0]; const int batchSize = PyArray_DIMS(%(V)s)[0];
const int outputChannels = %(W)s->dimensions[0]; const int outputChannels = PyArray_DIMS(%(W)s)[0];
const int inputChannels = %(V)s->dimensions[4]; const int inputChannels = PyArray_DIMS(%(V)s)[4];
if (%(W)s->dimensions[4] != inputChannels) if (PyArray_DIMS(%(W)s)[4] != inputChannels)
{ {
PyErr_Format(PyExc_ValueError, "Conv3D: W operates on a %%ld channel image but the image has %%d channels. Overall shape of input: (%%ld,%%ld,%%ld,%%ld,%%ld)", (long)%(W)s->dimensions[4], inputChannels, (long)%(V)s->dimensions[0], (long)%(V)s->dimensions[1], (long)%(V)s->dimensions[2], (long)%(V)s->dimensions[3], (long)%(V)s->dimensions[4]); PyErr_Format(PyExc_ValueError, "Conv3D: W operates on a %%ld channel image but the image has %%d channels. Overall shape of input: (%%ld,%%ld,%%ld,%%ld,%%ld)", (long)PyArray_DIMS(%(W)s)[4], inputChannels, (long)PyArray_DIMS(%(V)s)[0], (long)PyArray_DIMS(%(V)s)[1], (long)PyArray_DIMS(%(V)s)[2], (long)PyArray_DIMS(%(V)s)[3], (long)PyArray_DIMS(%(V)s)[4]);
%(fail)s %(fail)s
} }
if (%(b)s->dimensions[0] != outputChannels) if (PyArray_DIMS(%(b)s)[0] != outputChannels)
{ {
PyErr_Format(PyExc_ValueError, "Conv3D: b adds to a(n) %%ld channel output image but the output has %%d channels", (long)%(b)s->dimensions[0], outputChannels); PyErr_Format(PyExc_ValueError, "Conv3D: b adds to a(n) %%ld channel output image but the output has %%d channels", (long)PyArray_DIMS(%(b)s)[0], outputChannels);
%(fail)s %(fail)s
} }
{ //extra scope so error handler jumps don't cause errors { //extra scope so error handler jumps don't cause errors
const int filterHeight = %(W)s->dimensions[1]; const int filterHeight = PyArray_DIMS(%(W)s)[1];
const int filterWidth = %(W)s->dimensions[2]; const int filterWidth = PyArray_DIMS(%(W)s)[2];
const int filterDur = %(W)s->dimensions[3]; const int filterDur = PyArray_DIMS(%(W)s)[3];
const int vidHeight = %(V)s->dimensions[1]; const int vidHeight = PyArray_DIMS(%(V)s)[1];
const int vidWidth = %(V)s->dimensions[2]; const int vidWidth = PyArray_DIMS(%(V)s)[2];
const int vidDur = %(V)s->dimensions[3];\ const int vidDur = PyArray_DIMS(%(V)s)[3];\
if (vidHeight < filterHeight) if (vidHeight < filterHeight)
{ {
...@@ -291,13 +290,13 @@ class Conv3D(theano.Op): ...@@ -291,13 +290,13 @@ class Conv3D(theano.Op):
if(!(%(H)s) || %(H)s->dimensions[0]!=dims[0] || if(!(%(H)s) || PyArray_DIMS(%(H)s)[0]!=dims[0] ||
%(H)s->dimensions[1]!=dims[1] || PyArray_DIMS(%(H)s)[1]!=dims[1] ||
%(H)s->dimensions[2]!=dims[2] || PyArray_DIMS(%(H)s)[2]!=dims[2] ||
%(H)s->dimensions[3]!=dims[3] || PyArray_DIMS(%(H)s)[3]!=dims[3] ||
%(H)s->dimensions[4]!=dims[4]){ PyArray_DIMS(%(H)s)[4]!=dims[4]){
Py_XDECREF(%(H)s); Py_XDECREF(%(H)s);
%(H)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, %(V)s->descr->type_num); %(H)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(V)s)->type_num);
if (!(%(H)s)) { if (!(%(H)s)) {
PyErr_Format(PyExc_MemoryError,"Conv3D: Could not allocate output."); PyErr_Format(PyExc_MemoryError,"Conv3D: Could not allocate output.");
%(fail)s %(fail)s
...@@ -306,20 +305,20 @@ class Conv3D(theano.Op): ...@@ -306,20 +305,20 @@ class Conv3D(theano.Op):
{ // extra scope so fail works { // extra scope so fail works
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) ) #define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )
const int ws0 = %(W)s->strides[0]; const int ws0 = PyArray_STRIDES(%(W)s)[0];
const int ws1 = %(W)s->strides[1]; const int ws1 = PyArray_STRIDES(%(W)s)[1];
const int ws2 = %(W)s->strides[2]; const int ws2 = PyArray_STRIDES(%(W)s)[2];
const int vs1 = %(V)s->strides[1]; const int vs1 = PyArray_STRIDES(%(V)s)[1];
const int ws4 = %(W)s->strides[4]; const int ws4 = PyArray_STRIDES(%(W)s)[4];
const int vs4 = %(V)s->strides[4]; const int vs4 = PyArray_STRIDES(%(V)s)[4];
const int ws3 = %(W)s->strides[3]; const int ws3 = PyArray_STRIDES(%(W)s)[3];
const int vs3 = %(V)s->strides[3]; const int vs3 = PyArray_STRIDES(%(V)s)[3];
const int vs2 = %(V)s->strides[2]; const int vs2 = PyArray_STRIDES(%(V)s)[2];
const int bs = %(b)s->strides[0]; const int bs = PyArray_STRIDES(%(b)s)[0];
const int hs4 = %(H)s->strides[4]; const int hs4 = PyArray_STRIDES(%(H)s)[4];
...@@ -425,20 +424,20 @@ class Conv3D(theano.Op): ...@@ -425,20 +424,20 @@ class Conv3D(theano.Op):
Wpos = Wposl + ws2; Wpos = Wposl + ws2;
Vpos = Vposl + vs2; Vpos = Vposl + vs2;
} //close l } //close l
Wpos = Wposk + %(W)s->strides[1]; Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + %(V)s->strides[1]; Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k } //close k
Hpos = Hpost + %(H)s->strides[3]; Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt; Vpos = Vpost + vs3 * dt;
} //close t } //close t
Hpos = Hposc + %(H)s->strides[2]; Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc; Vpos = Vposc + vs2 * dc;
} //close c } //close c
Hpos = Hposr + %(H)s->strides[1]; Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + %(V)s->strides[1] * dr; Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r } //closes r
Hpos = Hposi + %(H)s->strides[0]; Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + %(V)s->strides[0]; Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i } //closes i
...@@ -516,8 +515,8 @@ class Conv3D(theano.Op): ...@@ -516,8 +515,8 @@ class Conv3D(theano.Op):
Wpos = Wposl + ws2; Wpos = Wposl + ws2;
Vpos = Vposl + vs2; Vpos = Vposl + vs2;
} //close l } //close l
Wpos = Wposk + %(W)s->strides[1]; Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + %(V)s->strides[1]; Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k } //close k
...@@ -528,17 +527,17 @@ class Conv3D(theano.Op): ...@@ -528,17 +527,17 @@ class Conv3D(theano.Op):
//std::cout << "incremented Wpos by " << ws0 << std::endl; //std::cout << "incremented Wpos by " << ws0 << std::endl;
//std::cout << "incremented Hpos by " << hs4 << std::endl; //std::cout << "incremented Hpos by " << hs4 << std::endl;
} //close j } //close j
Hpos = Hpost + %(H)s->strides[3]; Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt; Vpos = Vpost + vs3 * dt;
} //close t } //close t
Hpos = Hposc + %(H)s->strides[2]; Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc; Vpos = Vposc + vs2 * dc;
} //close c } //close c
Hpos = Hposr + %(H)s->strides[1]; Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + %(V)s->strides[1] * dr; Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r } //closes r
Hpos = Hposi + %(H)s->strides[0]; Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + %(V)s->strides[0]; Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i } //closes i
} //closes general case code } //closes general case code
}}}}}}} //extra scope so error handler jumps don't cross declarations }}}}}}} //extra scope so error handler jumps don't cross declarations
......
...@@ -85,10 +85,6 @@ class ConvGrad3D(theano.Op): ...@@ -85,10 +85,6 @@ class ConvGrad3D(theano.Op):
output_storage[0][0] = dCdW output_storage[0][0] = dCdW
def c_compile_args(self):
flags = ['-Werror']
return flags
def c_code(self, node, nodename, inputs, outputs, sub): def c_code(self, node, nodename, inputs, outputs, sub):
V, d, WShape, dCdH = inputs V, d, WShape, dCdH = inputs
fail = sub['fail'] fail = sub['fail']
...@@ -101,40 +97,40 @@ class ConvGrad3D(theano.Op): ...@@ -101,40 +97,40 @@ class ConvGrad3D(theano.Op):
//printf("\t\t\t\tConvGradW3D c code\\n"); //printf("\t\t\t\tConvGradW3D c code\\n");
//Check dimensionality of inputs //Check dimensionality of inputs
if (%(dCdH)s->nd != 5) if (PyArray_NDIM(%(dCdH)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "ConvGrad3D: dCdH must be a 5 dimensional tensor"); PyErr_Format(PyExc_ValueError, "ConvGrad3D: dCdH must be a 5 dimensional tensor");
%(fail)s %(fail)s
} }
if (%(V)s->nd != 5) if (PyArray_NDIM(%(V)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "ConvGrad3D: V must be a 5 dimensional tensor"); PyErr_Format(PyExc_ValueError, "ConvGrad3D: V must be a 5 dimensional tensor");
%(fail)s %(fail)s
} }
if (%(WShape)s->nd != 1) if (PyArray_NDIM(%(WShape)s) != 1)
{ {
PyErr_Format(PyExc_ValueError,"ConvGrad3D: WShape must be a vector."); PyErr_Format(PyExc_ValueError,"ConvGrad3D: WShape must be a vector.");
%(fail)s %(fail)s
} }
if (%(d)s->nd != 1) if (PyArray_NDIM(%(d)s) != 1)
{ {
PyErr_Format(PyExc_ValueError,"ConvGrad3D: d must be a vector."); PyErr_Format(PyExc_ValueError,"ConvGrad3D: d must be a vector.");
%(fail)s %(fail)s
} }
if (%(d)s->dimensions[0] != 3) if (PyArray_DIMS(%(d)s)[0] != 3)
{ {
PyErr_Format(PyExc_ValueError,"ConvGrad3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)%(d)s->dimensions[0]); PyErr_Format(PyExc_ValueError,"ConvGrad3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0]);
%(fail)s %(fail)s
} }
{ //extra scope so that fail will not jump over declarations { //extra scope so that fail will not jump over declarations
//Read and check sizes of inputs //Read and check sizes of inputs
const int batchSize = %(V)s->dimensions[0]; const int batchSize = PyArray_DIMS(%(V)s)[0];
if (%(WShape)s->dimensions[0] != 5) if (PyArray_DIMS(%(WShape)s)[0] != 5)
{ {
PyErr_Format(PyExc_ValueError,"ConvGrad3D: WShape must specify a 5D shape"); PyErr_Format(PyExc_ValueError,"ConvGrad3D: WShape must specify a 5D shape");
%(fail)s %(fail)s
...@@ -146,9 +142,9 @@ class ConvGrad3D(theano.Op): ...@@ -146,9 +142,9 @@ class ConvGrad3D(theano.Op):
} }
{ //extra scope so that fail will not jump over declarations { //extra scope so that fail will not jump over declarations
dtype_%(WShape)s * WShape = (dtype_%(WShape)s *) %(WShape)s->data; dtype_%(WShape)s * WShape = (dtype_%(WShape)s *) PyArray_DATA(%(WShape)s);
const int outputChannels = WShape[0]; const int outputChannels = WShape[0];
const int inputChannels = %(V)s->dimensions[4]; const int inputChannels = PyArray_DIMS(%(V)s)[4];
if (WShape[4] != inputChannels) if (WShape[4] != inputChannels)
{ {
PyErr_Format(PyExc_ValueError, "ConvGrad3D: W operates on a %%i channel image but the image has %%i channels",(int) WShape[1],inputChannels); PyErr_Format(PyExc_ValueError, "ConvGrad3D: W operates on a %%i channel image but the image has %%i channels",(int) WShape[1],inputChannels);
...@@ -159,9 +155,9 @@ class ConvGrad3D(theano.Op): ...@@ -159,9 +155,9 @@ class ConvGrad3D(theano.Op):
const int filterHeight = WShape[1]; const int filterHeight = WShape[1];
const int filterWidth = WShape[2]; const int filterWidth = WShape[2];
const int filterDur = WShape[3]; const int filterDur = WShape[3];
const int vidHeight = %(V)s->dimensions[1]; const int vidHeight = PyArray_DIMS(%(V)s)[1];
const int vidWidth = %(V)s->dimensions[2]; const int vidWidth = PyArray_DIMS(%(V)s)[2];
const int vidDur = %(V)s->dimensions[3]; const int vidDur = PyArray_DIMS(%(V)s)[3];
if (vidHeight < filterHeight) if (vidHeight < filterHeight)
{ {
PyErr_Format(PyExc_ValueError, "ConvGrad3D: W has a height of %%i but V is only %%i pixels tall", filterHeight, vidHeight); PyErr_Format(PyExc_ValueError, "ConvGrad3D: W has a height of %%i but V is only %%i pixels tall", filterHeight, vidHeight);
...@@ -197,13 +193,13 @@ class ConvGrad3D(theano.Op): ...@@ -197,13 +193,13 @@ class ConvGrad3D(theano.Op):
if (%(dCdH)s->dimensions[0] != batchSize || if (PyArray_DIMS(%(dCdH)s)[0] != batchSize ||
%(dCdH)s->dimensions[4] != outputChannels || PyArray_DIMS(%(dCdH)s)[4] != outputChannels ||
%(dCdH)s->dimensions[1] != outputHeight || PyArray_DIMS(%(dCdH)s)[1] != outputHeight ||
%(dCdH)s->dimensions[2] != outputWidth || PyArray_DIMS(%(dCdH)s)[2] != outputWidth ||
%(dCdH)s->dimensions[3] != outputDur) PyArray_DIMS(%(dCdH)s)[3] != outputDur)
{ {
PyErr_Format(PyExc_ValueError, "dCdH is the wrong size, expected (%%i,%%i,%%i,%%i,%%i), got (%%li,%%li,%%li,%%li,%%li)", batchSize, outputHeight, outputWidth, outputDur, outputChannels, (long)%(dCdH)s->dimensions[0], (long)%(dCdH)s->dimensions[1], (long)%(dCdH)s->dimensions[2], (long)%(dCdH)s->dimensions[3], (long)%(dCdH)s->dimensions[4]); PyErr_Format(PyExc_ValueError, "dCdH is the wrong size, expected (%%i,%%i,%%i,%%i,%%i), got (%%li,%%li,%%li,%%li,%%li)", batchSize, outputHeight, outputWidth, outputDur, outputChannels, (long)PyArray_DIMS(%(dCdH)s)[0], (long)PyArray_DIMS(%(dCdH)s)[1], (long)PyArray_DIMS(%(dCdH)s)[2], (long)PyArray_DIMS(%(dCdH)s)[3], (long)PyArray_DIMS(%(dCdH)s)[4]);
%(fail)s %(fail)s
} }
{ // extra scope for fail { // extra scope for fail
...@@ -215,13 +211,13 @@ class ConvGrad3D(theano.Op): ...@@ -215,13 +211,13 @@ class ConvGrad3D(theano.Op):
dims[2] = filterWidth; dims[2] = filterWidth;
dims[3] = filterDur; dims[3] = filterDur;
if(!(%(dCdW)s) || %(dCdW)s->dimensions[0]!=dims[0] || if(!(%(dCdW)s) || PyArray_DIMS(%(dCdW)s)[0]!=dims[0] ||
%(dCdW)s->dimensions[1]!=dims[1] || PyArray_DIMS(%(dCdW)s)[1]!=dims[1] ||
%(dCdW)s->dimensions[2]!=dims[2] || PyArray_DIMS(%(dCdW)s)[2]!=dims[2] ||
%(dCdW)s->dimensions[3]!=dims[3] || PyArray_DIMS(%(dCdW)s)[3]!=dims[3] ||
%(dCdW)s->dimensions[4]!=dims[4] ){ PyArray_DIMS(%(dCdW)s)[4]!=dims[4] ){
Py_XDECREF(%(dCdW)s); Py_XDECREF(%(dCdW)s);
%(dCdW)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, %(V)s->descr->type_num); %(dCdW)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(V)s)->type_num);
if (!(%(dCdW)s)) { if (!(%(dCdW)s)) {
PyErr_Format(PyExc_MemoryError,"ConvGrad3D: Could not allocate dCdW"); PyErr_Format(PyExc_MemoryError,"ConvGrad3D: Could not allocate dCdW");
...@@ -230,12 +226,12 @@ class ConvGrad3D(theano.Op): ...@@ -230,12 +226,12 @@ class ConvGrad3D(theano.Op):
} }
{ //extra scope so fail works { //extra scope so fail works
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*x->strides[0]+(j)*x->strides[1]+(k)*x->strides[2]+(l)*x->strides[3]+(m)*x->strides[4] ) #define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( PyArray_DATA(x) + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) ) #define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )
const int dhs3 = %(dCdH)s->strides[3]; const int dhs3 = PyArray_STRIDES(%(dCdH)s)[3];
const int dtvs3 = dt * %(V)s->strides[3]; const int dtvs3 = dt * PyArray_STRIDES(%(V)s)[3];
// Compute dCdW // Compute dCdW
//TODO-- see if this can be made faster by using ELEM_AT instead of ELEM5 //TODO-- see if this can be made faster by using ELEM_AT instead of ELEM5
...@@ -253,8 +249,8 @@ class ConvGrad3D(theano.Op): ...@@ -253,8 +249,8 @@ class ConvGrad3D(theano.Op):
for (int i = 0; i < batchSize; i++) { for (int i = 0; i < batchSize; i++) {
for (int p = 0; p < outputHeight; p++) { for (int p = 0; p < outputHeight; p++) {
for (int q = 0; q < outputWidth; q++) { for (int q = 0; q < outputWidth; q++) {
int Hpos = i * %(dCdH)s->strides[0] + j * %(dCdH)s->strides[4] + p * %(dCdH)s->strides[1] + q * %(dCdH)s->strides[2] ; int Hpos = i * PyArray_STRIDES(%(dCdH)s)[0] + j * PyArray_STRIDES(%(dCdH)s)[4] + p * PyArray_STRIDES(%(dCdH)s)[1] + q * PyArray_STRIDES(%(dCdH)s)[2] ;
int Vpos = i * %(V)s->strides[0] + z * %(V)s->strides[4] + (dr * p+k) * %(V)s->strides[1] + (dc*q+l) * %(V)s->strides[2] + m * %(V)s->strides[3]; int Vpos = i * PyArray_STRIDES(%(V)s)[0] + z * PyArray_STRIDES(%(V)s)[4] + (dr * p+k) * PyArray_STRIDES(%(V)s)[1] + (dc*q+l) * PyArray_STRIDES(%(V)s)[2] + m * PyArray_STRIDES(%(V)s)[3];
for (int r = 0; r < outputDur; r++) { for (int r = 0; r < outputDur; r++) {
writePos += ELEM5(%(dCdH)s,i,p,q,r,j) * ELEM5(%(V)s,i,dr*p+k,dc*q+l,dt*r+m,z); writePos += ELEM5(%(dCdH)s,i,p,q,r,j) * ELEM5(%(V)s,i,dr*p+k,dc*q+l,dt*r+m,z);
......
...@@ -35,10 +35,6 @@ class ConvTransp3D(theano.Op): ...@@ -35,10 +35,6 @@ class ConvTransp3D(theano.Op):
return theano.Apply(self, inputs=[W_,b_,d_,H_, RShape_], outputs = [ T.TensorType(H_.dtype, (False,False,False,False,False))() ] ) return theano.Apply(self, inputs=[W_,b_,d_,H_, RShape_], outputs = [ T.TensorType(H_.dtype, (False,False,False,False,False))() ] )
def c_compile_args(self):
flags = ['-Werror']
return flags
def infer_shape(self, node, input_shapes): def infer_shape(self, node, input_shapes):
W, b, d, H, RShape = node.inputs W, b, d, H, RShape = node.inputs
W_shape, b_shape, d_shape, H_shape, RShape_shape = input_shapes W_shape, b_shape, d_shape, H_shape, RShape_shape = input_shapes
...@@ -103,34 +99,36 @@ class ConvTransp3D(theano.Op): ...@@ -103,34 +99,36 @@ class ConvTransp3D(theano.Op):
//printf("\t\t\t\tConvTransp3D c code\\n"); //printf("\t\t\t\tConvTransp3D c code\\n");
//Check dimensionality of inputs //Check dimensionality of inputs
if (%(H)s->nd != 5) if (PyArray_NDIM(%(H)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "H must be a 5-D tensor but it is %%i-D",%(H)s->nd); PyErr_Format(PyExc_ValueError,
"H must be a 5-D tensor but it is %%i-D",
PyArray_NDIM(%(H)s));
%(fail)s %(fail)s
} }
if (%(W)s->nd != 5) if (PyArray_NDIM(%(W)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "ConvTransp3D: W must be a 5-D tensor"); PyErr_Format(PyExc_ValueError, "ConvTransp3D: W must be a 5-D tensor");
%(fail)s %(fail)s
} }
if (%(b)s->nd != 1) if (PyArray_NDIM(%(b)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "ConvTransp3D: b must be a vector"); PyErr_Format(PyExc_ValueError, "ConvTransp3D: b must be a vector");
%(fail)s %(fail)s
} }
if (%(d)s->nd != 1) if (PyArray_NDIM(%(d)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "ConvTransp3D: d must be a vector"); PyErr_Format(PyExc_ValueError, "ConvTransp3D: d must be a vector");
%(fail)s %(fail)s
} }
//Read and check stride arguments //Read and check stride arguments
if (%(d)s->dimensions[0] != 3) if (PyArray_DIMS(%(d)s)[0] != 3)
{ {
PyErr_Format(PyExc_ValueError, "ConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", (long)%(d)s->dimensions[0] ); PyErr_Format(PyExc_ValueError, "ConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0] );
%(fail)s %(fail)s
} }
...@@ -149,33 +147,33 @@ class ConvTransp3D(theano.Op): ...@@ -149,33 +147,33 @@ class ConvTransp3D(theano.Op):
//Read and check sizes of inputs //Read and check sizes of inputs
{ // for fail 2 { // for fail 2
const int batchSize = %(H)s->dimensions[0]; const int batchSize = PyArray_DIMS(%(H)s)[0];
const int outputChannels = %(W)s->dimensions[0]; const int outputChannels = PyArray_DIMS(%(W)s)[0];
if (%(H)s->dimensions[4] != outputChannels) if (PyArray_DIMS(%(H)s)[4] != outputChannels)
{ {
PyErr_Format(PyExc_ValueError, "W produces a %%i channel image but the image has %%li channels. W.shape: (%%li, %%li, %%li, %%li, %%li) H.shape: (%%li, %%li, %%li, %%li, %%li)", outputChannels, (long)%(H)s->dimensions[4], (long)%(W)s->dimensions[0], (long)%(W)s->dimensions[1], (long)%(W)s->dimensions[2], (long)%(W)s->dimensions[3], (long)%(W)s->dimensions[4], (long)%(H)s->dimensions[0], (long)%(H)s->dimensions[1], (long)%(H)s->dimensions[2], (long)%(H)s->dimensions[3], (long)%(H)s->dimensions[4]); PyErr_Format(PyExc_ValueError, "W produces a %%i channel image but the image has %%li channels. W.shape: (%%li, %%li, %%li, %%li, %%li) H.shape: (%%li, %%li, %%li, %%li, %%li)", outputChannels, (long)PyArray_DIMS(%(H)s)[4], (long)PyArray_DIMS(%(W)s)[0], (long)PyArray_DIMS(%(W)s)[1], (long)PyArray_DIMS(%(W)s)[2], (long)PyArray_DIMS(%(W)s)[3], (long)PyArray_DIMS(%(W)s)[4], (long)PyArray_DIMS(%(H)s)[0], (long)PyArray_DIMS(%(H)s)[1], (long)PyArray_DIMS(%(H)s)[2], (long)PyArray_DIMS(%(H)s)[3], (long)PyArray_DIMS(%(H)s)[4]);
%(fail)s %(fail)s
} }
{ // for fail 3 { // for fail 3
const int inputChannels = %(W)s->dimensions[4]; const int inputChannels = PyArray_DIMS(%(W)s)[4];
if (%(b)s->dimensions[0] != inputChannels) if (PyArray_DIMS(%(b)s)[0] != inputChannels)
{ {
PyErr_Format(PyExc_ValueError, "ConvTransp3D: b operates on a %%li channel image but the image has %%i channels", (long)%(b)s->dimensions[0], inputChannels ); PyErr_Format(PyExc_ValueError, "ConvTransp3D: b operates on a %%li channel image but the image has %%i channels", (long)PyArray_DIMS(%(b)s)[0], inputChannels );
%(fail)s %(fail)s
} }
{ // for fail 4 { // for fail 4
const int filterHeight = %(W)s->dimensions[1]; const int filterHeight = PyArray_DIMS(%(W)s)[1];
const int filterWidth = %(W)s->dimensions[2]; const int filterWidth = PyArray_DIMS(%(W)s)[2];
const int filterDur = %(W)s->dimensions[3]; const int filterDur = PyArray_DIMS(%(W)s)[3];
const int outputHeight = %(H)s->dimensions[1]; const int outputHeight = PyArray_DIMS(%(H)s)[1];
const int outputWidth = %(H)s->dimensions[2]; const int outputWidth = PyArray_DIMS(%(H)s)[2];
const int outputDur = %(H)s->dimensions[3]; const int outputDur = PyArray_DIMS(%(H)s)[3];
int videoHeight = (outputHeight-1) * dr + filterHeight; int videoHeight = (outputHeight-1) * dr + filterHeight;
int videoWidth = (outputWidth-1) * dc + filterWidth; int videoWidth = (outputWidth-1) * dc + filterWidth;
...@@ -183,13 +181,13 @@ class ConvTransp3D(theano.Op): ...@@ -183,13 +181,13 @@ class ConvTransp3D(theano.Op):
if (%(RShape)s) if (%(RShape)s)
{ {
if (%(RShape)s->nd != 1) if (PyArray_NDIM(%(RShape)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "ConvTransp3D: RShape must be a vector"); PyErr_Format(PyExc_ValueError, "ConvTransp3D: RShape must be a vector");
%(fail)s %(fail)s
} }
if (%(RShape)s->dimensions[0] != 3) if (PyArray_DIMS(%(RShape)s)[0] != 3)
{ {
PyErr_Format(PyExc_ValueError, "RShape must specify a 3D shape ( [height,width,duration] )"); PyErr_Format(PyExc_ValueError, "RShape must specify a 3D shape ( [height,width,duration] )");
%(fail)s %(fail)s
...@@ -223,14 +221,14 @@ class ConvTransp3D(theano.Op): ...@@ -223,14 +221,14 @@ class ConvTransp3D(theano.Op):
dims[2] = videoWidth; dims[2] = videoWidth;
dims[3] = videoDur; dims[3] = videoDur;
if(!(%(R)s) || %(R)s->dimensions[0]!=dims[0] || if(!(%(R)s) || PyArray_DIMS(%(R)s)[0]!=dims[0] ||
%(R)s->dimensions[1]!=dims[1] || PyArray_DIMS(%(R)s)[1]!=dims[1] ||
%(R)s->dimensions[2]!=dims[2] || PyArray_DIMS(%(R)s)[2]!=dims[2] ||
%(R)s->dimensions[3]!=dims[3] || PyArray_DIMS(%(R)s)[3]!=dims[3] ||
%(R)s->dimensions[4]!=dims[4]) PyArray_DIMS(%(R)s)[4]!=dims[4])
{ {
Py_XDECREF(%(R)s); Py_XDECREF(%(R)s);
%(R)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, %(H)s->descr->type_num); %(R)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(H)s)->type_num);
if (!(%(R)s)) { if (!(%(R)s)) {
PyErr_Format(PyExc_MemoryError, "ConvTransp3D: could not allocate R"); PyErr_Format(PyExc_MemoryError, "ConvTransp3D: could not allocate R");
%(fail)s %(fail)s
...@@ -239,17 +237,17 @@ class ConvTransp3D(theano.Op): ...@@ -239,17 +237,17 @@ class ConvTransp3D(theano.Op):
{ // for fail 6 { // for fail 6
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*x->strides[0]+(j)*x->strides[1]+(k)*x->strides[2]+(l)*x->strides[3]+(m)*x->strides[4] ) #define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( PyArray_DATA(x) + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) ) #define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )
dtype_%(b)s * b = (dtype_%(b)s *) %(b)s->data; dtype_%(b)s * b = (dtype_%(b)s *) PyArray_DATA(%(b)s);
int rs4 = %(R)s->strides[4]; int rs4 = PyArray_STRIDES(%(R)s)[4];
int ws0 = %(W)s->strides[0]; int ws0 = PyArray_STRIDES(%(W)s)[0];
int ws4 = %(W)s->strides[4]; int ws4 = PyArray_STRIDES(%(W)s)[4];
int hs4 = %(H)s->strides[4]; int hs4 = PyArray_STRIDES(%(H)s)[4];
// Compute R // Compute R
// R[i,r,c,t,j] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, rk, ck, tk,j] * H[i,rc,cc,tc,k] // R[i,r,c,t,j] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, rk, ck, tk,j] * H[i,rc,cc,tc,k]
...@@ -262,7 +260,7 @@ class ConvTransp3D(theano.Op): ...@@ -262,7 +260,7 @@ class ConvTransp3D(theano.Op):
for (int t = 0; t < videoDur; t++) { for (int t = 0; t < videoDur; t++) {
const int ftc = (int)std::max(0.0f, ceilf(float(t-filterDur +1) /float(dt))); const int ftc = (int)std::max(0.0f, ceilf(float(t-filterDur +1) /float(dt)));
long long Rpost = i * %(R)s->strides[0] + r * %(R)s->strides[1] + c * %(R)s->strides[2] + t * %(R)s->strides[3]; long long Rpost = i * PyArray_STRIDES(%(R)s)[0] + r * PyArray_STRIDES(%(R)s)[1] + c * PyArray_STRIDES(%(R)s)[2] + t * PyArray_STRIDES(%(R)s)[3];
long long Rpos = Rpost; long long Rpos = Rpost;
for (int j = 0; j < inputChannels; j++) for (int j = 0; j < inputChannels; j++)
...@@ -286,8 +284,8 @@ class ConvTransp3D(theano.Op): ...@@ -286,8 +284,8 @@ class ConvTransp3D(theano.Op):
const int tk = t - tc * dt; const int tk = t - tc * dt;
if (tk < 0) break; if (tk < 0) break;
int Wpos = rk * %(W)s->strides[1] + ck * %(W)s->strides[2] + tk * %(W)s->strides[3]; int Wpos = rk * PyArray_STRIDES(%(W)s)[1] + ck * PyArray_STRIDES(%(W)s)[2] + tk * PyArray_STRIDES(%(W)s)[3];
int Hpostc = i * %(H)s->strides[0] + rc * %(H)s->strides[1] + cc * %(H)s->strides[2] + tc * %(H)s->strides[3]; int Hpostc = i * PyArray_STRIDES(%(H)s)[0] + rc * PyArray_STRIDES(%(H)s)[1] + cc * PyArray_STRIDES(%(H)s)[2] + tc * PyArray_STRIDES(%(H)s)[3];
Rpos = Rpost; Rpos = Rpost;
for (int j = 0; j < inputChannels; j++) for (int j = 0; j < inputChannels; j++)
{ {
......
...@@ -970,7 +970,7 @@ class ConvOp(OpenMPOp): ...@@ -970,7 +970,7 @@ class ConvOp(OpenMPOp):
def c_support_code(self): def c_support_code(self):
return """ return """
#define STRIDES(arr) ((arr)->strides) #define STRIDES(arr) (PyArray_STRIDES(arr))
#define FULL 2 #define FULL 2
#define SAME 1 #define SAME 1
#define VALID 0 #define VALID 0
...@@ -1159,15 +1159,15 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){ ...@@ -1159,15 +1159,15 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
""" % (locals()) """ % (locals())
else: else:
d["self_bsize"] = "%(img2d)s->dimensions[0]" % d d["self_bsize"] = "PyArray_DIMS(%(img2d)s)[0]" % d
d["self_nkern"] = "%(filtersflipped)s->dimensions[0]" % d d["self_nkern"] = "PyArray_DIMS(%(filtersflipped)s)[0]" % d
d["self_outshp0"] = "-1" d["self_outshp0"] = "-1"
d["self_outshp1"] = "-1" d["self_outshp1"] = "-1"
d["self_imshp0"] = "%(img2d)s->dimensions[1]" % d d["self_imshp0"] = "PyArray_DIMS(%(img2d)s)[1]" % d
d["self_imshp1"] = "%(img2d)s->dimensions[2]" % d d["self_imshp1"] = "PyArray_DIMS(%(img2d)s)[2]" % d
d["self_imshp2"] = "%(img2d)s->dimensions[3]" % d d["self_imshp2"] = "PyArray_DIMS(%(img2d)s)[3]" % d
d["self_kshp0"] = "%(filtersflipped)s->dimensions[2]" % d d["self_kshp0"] = "PyArray_DIMS(%(filtersflipped)s)[2]" % d
d["self_kshp1"] = "%(filtersflipped)s->dimensions[3]" % d d["self_kshp1"] = "PyArray_DIMS(%(filtersflipped)s)[3]" % d
d["affectation"] = "+=" d["affectation"] = "+="
d["all_shape"] = "0" d["all_shape"] = "0"
d["dim_zz_const"] = "" d["dim_zz_const"] = ""
...@@ -1242,7 +1242,7 @@ _conv_op_code_a = """ ...@@ -1242,7 +1242,7 @@ _conv_op_code_a = """
const int mode=%(mode)s; const int mode=%(mode)s;
int typenum=0, typenum_f=0; int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL; PyArrayObject *ain1=NULL, *ain2=NULL;
PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL; PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
const %(type)s fill_value = 0; const %(type)s fill_value = 0;
int type_im=PyArray_TYPE(%(img2d)s); int type_im=PyArray_TYPE(%(img2d)s);
...@@ -1266,35 +1266,35 @@ kerns_shape.len=4; ...@@ -1266,35 +1266,35 @@ kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL; PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(%(img2d)s->nd==2){ if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=%(img2d)s->dimensions[1]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=%(img2d)s->dimensions[0]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(%(img2d)s->nd==3){ }else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=%(img2d)s->dimensions[2]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=%(img2d)s->dimensions[1]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(%(img2d)s->nd==4){ }else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=%(img2d)s->dimensions[3]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=%(img2d)s->dimensions[2]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=%(img2d)s->dimensions[1]; img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else { }else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape"); PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)s; %(fail)s;
} }
if(%(filtersflipped)s->nd==3){ if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=%(filtersflipped)s->dimensions[2]; kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=%(filtersflipped)s->dimensions[1]; kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(%(filtersflipped)s->nd==4){ }else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=%(filtersflipped)s->dimensions[3]; kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=%(filtersflipped)s->dimensions[2]; kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=%(filtersflipped)s->dimensions[1]; kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{ }else{
std::stringstream temp; std::stringstream temp;
temp << "nddim="<<%(filtersflipped)s->nd; temp << "nddim="<<PyArray_NDIM(%(filtersflipped)s);
std::string param = temp.str(); std::string param = temp.str();
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str()); ("kernel don't have a good shape. " + param).c_str());
...@@ -1303,33 +1303,33 @@ if(%(filtersflipped)s->nd==3){ ...@@ -1303,33 +1303,33 @@ if(%(filtersflipped)s->nd==3){
%(assert_size)s %(assert_size)s
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER); img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d; img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s)) if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){ || (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d)); contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d); Py_DECREF(img2d);
img2d = contig; img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){ if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous"); PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s; %(fail)s;
} }
} }
img2d_arr = (PyArrayObject*)img2d; img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER); filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped; filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s)) if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){ || (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped)); contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped); Py_DECREF(filtersflipped);
filtersflipped = contig; filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){ filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous"); PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s; %(fail)s;
} }
} }
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){ if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
...@@ -1348,10 +1348,10 @@ if (!img2d) %(fail)s; ...@@ -1348,10 +1348,10 @@ if (!img2d) %(fail)s;
if (!filtersflipped) %(fail)s; if (!filtersflipped) %(fail)s;
if ((!%(z)s) if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4 || *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(self_bsize)s) ||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(%(z)s->dimensions[1] != %(self_nkern)s) ||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(%(z)s->dimensions[2] != dim_zz[0]) ||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (%(z)s->dimensions[3] != dim_zz[1]) || (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
) )
{ {
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
...@@ -1364,36 +1364,37 @@ if ((!%(z)s) ...@@ -1364,36 +1364,37 @@ if ((!%(z)s)
}else{ }else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0); //PyArray_FILLWBYTE((PyObject*)%(z)s,0);
} }
z_arr = (PyArrayObject*) %(z)s;
int Os[2]; int Os[2];
Os[0]=%(self_outshp0)s; Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s; Os[1]=%(self_outshp1)s;
//assertions //assertions
if (%(z)s->strides[0] != %(z)s->dimensions[1] * if (PyArray_STRIDES(%(z)s)[0] != PyArray_DIMS(%(z)s)[1] *
%(z)s->dimensions[2] * PyArray_DIMS(%(z)s)[2] *
%(z)s->dimensions[3] * PyArray_DIMS(%(z)s)[3] *
(npy_intp)sizeof(%(type)s)) (npy_intp)sizeof(%(type)s))
%(fail)s; %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * if (PyArray_STRIDES(%(z)s)[1] != PyArray_DIMS(%(z)s)[2] *
%(z)s->dimensions[3] * PyArray_DIMS(%(z)s)[3] *
(npy_intp)sizeof(%(type)s)) (npy_intp)sizeof(%(type)s))
%(fail)s; %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) if (PyArray_STRIDES(%(z)s)[2] != PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s))
%(fail)s; %(fail)s;
if (%(z)s->strides[3] != (npy_intp)sizeof(%(type)s)) if (PyArray_STRIDES(%(z)s)[3] != (npy_intp)sizeof(%(type)s))
%(fail)s; %(fail)s;
for(int b=0;b< %(self_bsize)s;b++){ for(int b=0;b< %(self_bsize)s;b++){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){ for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(%(z)s,b,n_kern)); %(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0; for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){ for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d,b,stack_size)); const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size)); const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
for (int iter_m=0; iter_m < Os[0]; iter_m++) { for (int iter_m=0; iter_m < Os[0]; iter_m++) {
...@@ -1514,7 +1515,7 @@ Py_XDECREF(filtersflipped); ...@@ -1514,7 +1515,7 @@ Py_XDECREF(filtersflipped);
_conv_op_code_valid_gemm = """ _conv_op_code_valid_gemm = """
int typenum=0, typenum_f=0; int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *img2d_arr=NULL; PyArrayObject *ain1=NULL, *ain2=NULL, *img2d_arr=NULL, *z_arr=NULL;
const int NKERN = %(self_nkern)s; const int NKERN = %(self_nkern)s;
int type_im=PyArray_TYPE(%(img2d)s); int type_im=PyArray_TYPE(%(img2d)s);
...@@ -1536,35 +1537,35 @@ kerns_shape.ptr=kerns_dim; ...@@ -1536,35 +1537,35 @@ kerns_shape.ptr=kerns_dim;
kerns_shape.len=4; kerns_shape.len=4;
PyObject *img2d=NULL, *contig; PyObject *img2d=NULL, *contig;
if(%(img2d)s->nd==2){ if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=%(img2d)s->dimensions[1]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=%(img2d)s->dimensions[0]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(%(img2d)s->nd==3){ }else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=%(img2d)s->dimensions[2]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=%(img2d)s->dimensions[1]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(%(img2d)s->nd==4){ }else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=%(img2d)s->dimensions[3]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=%(img2d)s->dimensions[2]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=%(img2d)s->dimensions[1]; img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else { }else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape"); PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)s; %(fail)s;
} }
if(%(filtersflipped)s->nd==3){ if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=%(filtersflipped)s->dimensions[2]; kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=%(filtersflipped)s->dimensions[1]; kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(%(filtersflipped)s->nd==4){ }else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=%(filtersflipped)s->dimensions[3]; kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=%(filtersflipped)s->dimensions[2]; kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=%(filtersflipped)s->dimensions[1]; kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{ }else{
std::stringstream temp; std::stringstream temp;
temp << "nddim="<<%(filtersflipped)s->nd; temp << "nddim="<<PyArray_NDIM(%(filtersflipped)s);
std::string param = temp.str(); std::string param = temp.str();
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str()); ("kernel don't have a good shape. " + param).c_str());
...@@ -1576,14 +1577,14 @@ if (NKERN != kerns_dim[0]) ...@@ -1576,14 +1577,14 @@ if (NKERN != kerns_dim[0])
%(fail)s; %(fail)s;
} }
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER); img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d; img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s)) if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){ || (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d)); contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d); Py_DECREF(img2d);
img2d = contig; img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){ if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous"); PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s; %(fail)s;
} }
...@@ -1601,10 +1602,10 @@ if (!img2d) { ...@@ -1601,10 +1602,10 @@ if (!img2d) {
} }
if ((!%(z)s) if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4 || *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(self_bsize)s) ||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(%(z)s->dimensions[1] != %(self_nkern)s) ||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(%(z)s->dimensions[2] != dim_zz[0]) ||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (%(z)s->dimensions[3] != dim_zz[1]) || (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
) )
{ {
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
...@@ -1617,6 +1618,7 @@ if ((!%(z)s) ...@@ -1617,6 +1618,7 @@ if ((!%(z)s)
}else{ }else{
PyArray_FILLWBYTE((PyObject*)%(z)s,0); PyArray_FILLWBYTE((PyObject*)%(z)s,0);
} }
z_arr = (PyArrayObject*) %(z)s;
%(assert_size)s %(assert_size)s
...@@ -1637,7 +1639,7 @@ for(int i=0;i < kerns_dim[0];++i){ ...@@ -1637,7 +1639,7 @@ for(int i=0;i < kerns_dim[0];++i){
for(int j=0;j < kerns_dim[1];++j){ for(int j=0;j < kerns_dim[1];++j){
for(int k=0;k < kerns_dim[2];++k){ for(int k=0;k < kerns_dim[2];++k){
for(int l=0;l < kerns_dim[3];++l){ for(int l=0;l < kerns_dim[3];++l){
%(type)s * ff = ((%(filtersflipped)s)->nd == 3) %(type)s * ff = ((PyArray_NDIM(%(filtersflipped)s)) == 3)
? (%(type)s *)PyArray_GETPTR3(%(filtersflipped)s, i, kerns_dim[2]-1-k, kerns_dim[3]-1-l) ? (%(type)s *)PyArray_GETPTR3(%(filtersflipped)s, i, kerns_dim[2]-1-k, kerns_dim[3]-1-l)
: (%(type)s *)PyArray_GETPTR4(%(filtersflipped)s, i, j, kerns_dim[2]-1-k, kerns_dim[3]-1-l); : (%(type)s *)PyArray_GETPTR4(%(filtersflipped)s, i, j, kerns_dim[2]-1-k, kerns_dim[3]-1-l);
myfilters[i * (kerns_dim[1]*kerns_dim[2]*kerns_dim[3]) myfilters[i * (kerns_dim[1]*kerns_dim[2]*kerns_dim[3])
...@@ -1672,7 +1674,7 @@ for(int b=0;b< %(self_bsize)s;b++){ ...@@ -1672,7 +1674,7 @@ for(int b=0;b< %(self_bsize)s;b++){
int imgview_stride = dim_im[1]; int imgview_stride = dim_im[1];
int filter_rows_stride =kerns_dim[1]*kerns_dim[2]*kerns_dim[3]; int filter_rows_stride =kerns_dim[1]*kerns_dim[2]*kerns_dim[3];
//remember, Fortran wants a column-major interpretation //remember, Fortran wants a column-major interpretation
assert(img2d->strides[3] == (npy_intp)sizeof(%(type)s)); assert(PyArray_STRIDES(img2d)[3] == (npy_intp)sizeof(%(type)s));
if (0){ if (0){
std::cerr << "b " << b << " img_col " << img_col << " filterrow " << filter_row << " stackidx " <<stackidx << "\\n"; std::cerr << "b " << b << " img_col " << img_col << " filterrow " << filter_row << " stackidx " <<stackidx << "\\n";
...@@ -1717,10 +1719,10 @@ for(int b=0;b< %(self_bsize)s;b++){ ...@@ -1717,10 +1719,10 @@ for(int b=0;b< %(self_bsize)s;b++){
%(type)s * z_p = (%(type)s *)PyArray_GETPTR4(%(z)s, b, kernel_idx, img_row, img_col); %(type)s * z_p = (%(type)s *)PyArray_GETPTR4(%(z)s, b, kernel_idx, img_row, img_col);
if (0) if (0)
{ {
if (b >= %(z)s->dimensions[0]) %(fail)s; if (b >= PyArray_DIMS(%(z)s)[0]) %(fail)s;
if (kernel_idx >= %(z)s->dimensions[1]) %(fail)s; if (kernel_idx >= PyArray_DIMS(%(z)s)[1]) %(fail)s;
if (img_row >= %(z)s->dimensions[2]) %(fail)s; if (img_row >= PyArray_DIMS(%(z)s)[2]) %(fail)s;
if (img_col >= %(z)s->dimensions[3]) %(fail)s; if (img_col >= PyArray_DIMS(%(z)s)[3]) %(fail)s;
} }
z_p[0] += kbuf[img_row * kbufstride + kernel_idx]; z_p[0] += kbuf[img_row * kbufstride + kernel_idx];
} }
...@@ -1766,7 +1768,7 @@ def gen_conv_code_unroll_batch_kern(d, unroll_bsize=1, unroll_ksize=1): ...@@ -1766,7 +1768,7 @@ def gen_conv_code_unroll_batch_kern(d, unroll_bsize=1, unroll_ksize=1):
ret = """ ret = """
const int mode=%(mode)s; const int mode=%(mode)s;
int typenum=0, typenum_f=0; int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL; PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;;
const %(type)s fill_value = 0; const %(type)s fill_value = 0;
int type_im=PyArray_TYPE(%(img2d)s); int type_im=PyArray_TYPE(%(img2d)s);
...@@ -1788,36 +1790,36 @@ kerns_shape.ptr=kerns_dim; ...@@ -1788,36 +1790,36 @@ kerns_shape.ptr=kerns_dim;
kerns_shape.len=4; kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL; PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(%(img2d)s->nd==2){ if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=%(img2d)s->dimensions[1]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=%(img2d)s->dimensions[0]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(%(img2d)s->nd==3){ }else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=%(img2d)s->dimensions[2]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=%(img2d)s->dimensions[1]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(%(img2d)s->nd==4){ }else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=%(img2d)s->dimensions[3]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=%(img2d)s->dimensions[2]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=%(img2d)s->dimensions[1]; img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else { }else {
std::stringstream temp; std::stringstream temp;
temp << "nddim="<<%(img2d)s->nd; temp << "nddim="<<PyArray_NDIM(%(img2d)s);
std::string param = temp.str(); std::string param = temp.str();
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
("img don't have a good shape. " + param).c_str()); ("img don't have a good shape. " + param).c_str());
%(fail)s; %(fail)s;
} }
if(%(filtersflipped)s->nd==3){ if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=%(filtersflipped)s->dimensions[2]; kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=%(filtersflipped)s->dimensions[1]; kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(%(filtersflipped)s->nd==4){ }else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=%(filtersflipped)s->dimensions[3]; kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=%(filtersflipped)s->dimensions[2]; kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=%(filtersflipped)s->dimensions[1]; kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{ }else{
PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape"); PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
%(fail)s; %(fail)s;
...@@ -1825,33 +1827,33 @@ if(%(filtersflipped)s->nd==3){ ...@@ -1825,33 +1827,33 @@ if(%(filtersflipped)s->nd==3){
%(assert_size)s %(assert_size)s
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER); img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d; img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s)) if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){ || (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d)); contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d); Py_DECREF(img2d);
img2d = contig; img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){ if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous"); PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s; %(fail)s;
} }
} }
img2d_arr = (PyArrayObject*)img2d; img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER); filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped; filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s)) if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){ || (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped)); contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped); Py_DECREF(filtersflipped);
filtersflipped = contig; filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){ filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous"); PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s; %(fail)s;
} }
} }
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){ if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s; PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
...@@ -1865,10 +1867,10 @@ if (!img2d) %(fail)s; ...@@ -1865,10 +1867,10 @@ if (!img2d) %(fail)s;
if (!filtersflipped) %(fail)s; if (!filtersflipped) %(fail)s;
if ((!%(z)s) if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4 || *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(self_bsize)s) ||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(%(z)s->dimensions[1] != %(self_nkern)s) ||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(%(z)s->dimensions[2] != dim_zz[0]) ||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (%(z)s->dimensions[3] != dim_zz[1]) || (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
) )
{ {
{Py_XDECREF(%(z)s);} {Py_XDECREF(%(z)s);}
...@@ -1881,28 +1883,29 @@ if ((!%(z)s) ...@@ -1881,28 +1883,29 @@ if ((!%(z)s)
}else{ }else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0); //PyArray_FILLWBYTE((PyObject*)%(z)s,0);
} }
z_arr = (PyArrayObject*) %(z)s;
int Os[2]; int Os[2];
Os[0]=%(self_outshp0)s; Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s; Os[1]=%(self_outshp1)s;
//assertions //assertions
if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s; if (PyArray_STRIDES(%(z)s)[0] != PyArray_DIMS(%(z)s)[1] *PyArray_DIMS(%(z)s)[2] *PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s; if (PyArray_STRIDES(%(z)s)[1] != PyArray_DIMS(%(z)s)[2] * PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s; if (PyArray_STRIDES(%(z)s)[2] != PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != (npy_intp)sizeof(%(type)s)) %(fail)s; if (PyArray_STRIDES(%(z)s)[3] != (npy_intp)sizeof(%(type)s)) %(fail)s;
for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){ for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
""" % d """ % d
ret += my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(%(z)s,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));") ret += my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(z_arr,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));")
ret += my_dup("for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;", unroll_bsize * unroll_ksize) ret += my_dup("for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;", unroll_bsize * unroll_ksize)
ret += """ ret += """
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){ for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
""" % d """ % d
ret += my_dup("const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d,b+%(unroll_iter)s,stack_size));", unroll_bsize) ret += my_dup("const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b+%(unroll_iter)s,stack_size));", unroll_bsize)
ret += my_dup("const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern+%(unroll_iter)s,stack_size));", unroll_ksize) ret += my_dup("const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern+%(unroll_iter)s,stack_size));", unroll_ksize)
ret += """ ret += """
int new_m; int new_m;
...@@ -1999,7 +2002,7 @@ Py_XDECREF(filtersflipped); ...@@ -1999,7 +2002,7 @@ Py_XDECREF(filtersflipped);
_conv_op_code_unroll_patch = """ _conv_op_code_unroll_patch = """
const int mode=%(mode)s; const int mode=%(mode)s;
int typenum=0, typenum_f=0; int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL; PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
const %(type)s fill_value = 0;//only value of 0 are currently tested and correctly implemented const %(type)s fill_value = 0;//only value of 0 are currently tested and correctly implemented
int type_im=PyArray_TYPE(%(img2d)s); int type_im=PyArray_TYPE(%(img2d)s);
...@@ -2025,68 +2028,68 @@ kerns_shape.ptr=kerns_dim; ...@@ -2025,68 +2028,68 @@ kerns_shape.ptr=kerns_dim;
kerns_shape.len=4; kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL; PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(%(img2d)s->nd==2){ if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=%(img2d)s->dimensions[1]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=%(img2d)s->dimensions[0]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(%(img2d)s->nd==3){ }else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=%(img2d)s->dimensions[2]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=%(img2d)s->dimensions[1]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(%(img2d)s->nd==4){ }else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=%(img2d)s->dimensions[3]; img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=%(img2d)s->dimensions[2]; img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=%(img2d)s->dimensions[1]; img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=%(img2d)s->dimensions[0]; img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else { }else {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"image don't have a good number of dimensions %%d. ", %(filtersflipped)s->nd); "image don't have a good number of dimensions %%d. ", PyArray_NDIM(%(filtersflipped)s));
%(fail)s; %(fail)s;
} }
if(%(filtersflipped)s->nd==3){ if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=%(filtersflipped)s->dimensions[2]; kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=%(filtersflipped)s->dimensions[1]; kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(%(filtersflipped)s->nd==4){ }else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=%(filtersflipped)s->dimensions[3]; kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=%(filtersflipped)s->dimensions[2]; kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=%(filtersflipped)s->dimensions[1]; kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0]; kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{ }else{
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"kernel don't have a good number of dimensions %%d. ", %(filtersflipped)s->nd); "kernel don't have a good number of dimensions %%d. ", PyArray_NDIM(%(filtersflipped)s));
%(fail)s; %(fail)s;
} }
%(assert_size)s %(assert_size)s
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER); img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d; img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(%(type)s)) if ((PyArray_STRIDES(img2d_arr)[3] != sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(%(type)s))){ || (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d)); contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d); Py_DECREF(img2d);
img2d = contig; img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){ if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous"); PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s; %(fail)s;
} }
} }
img2d_arr = (PyArrayObject*)img2d; img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER); filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped; filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(%(type)s)) if ((PyArray_STRIDES(filtersflipped_arr)[3] != sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(%(type)s))){ || (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped)); contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped); Py_DECREF(filtersflipped);
filtersflipped = contig; filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){ filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous"); PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s; %(fail)s;
} }
} }
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){ if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s; PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
...@@ -2107,10 +2110,10 @@ if (!img2d) %(fail)s; ...@@ -2107,10 +2110,10 @@ if (!img2d) %(fail)s;
if (!filtersflipped) %(fail)s; if (!filtersflipped) %(fail)s;
if ((!%(z)s) if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4 || *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(self_bsize)s) ||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(%(z)s->dimensions[1] != %(self_nkern)s) ||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(%(z)s->dimensions[2] != dim_zz[0]) ||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (%(z)s->dimensions[3] != dim_zz[1]) || (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
) )
{ {
if (%(z)s) Py_DECREF(%(z)s); if (%(z)s) Py_DECREF(%(z)s);
...@@ -2124,12 +2127,13 @@ if ((!%(z)s) ...@@ -2124,12 +2127,13 @@ if ((!%(z)s)
}else{ }else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0); //PyArray_FILLWBYTE((PyObject*)%(z)s,0);
} }
z_arr = (PyArrayObject*) %(z)s;
//assertions //assertions
if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s; if (PyArray_STRIDES(%(z)s)[0] != PyArray_DIMS(%(z)s)[1] *PyArray_DIMS(%(z)s)[2] *PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s; if (PyArray_STRIDES(%(z)s)[1] != PyArray_DIMS(%(z)s)[2] * PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s; if (PyArray_STRIDES(%(z)s)[2] != PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s; if (PyArray_STRIDES(%(z)s)[3] != sizeof(%(type)s)) %(fail)s;
//The if on the number of loop make a speed up for small array. //The if on the number of loop make a speed up for small array.
//with g++ 4.5.1. The compiler should be smart enough to do this himself! //with g++ 4.5.1. The compiler should be smart enough to do this himself!
...@@ -2144,13 +2148,13 @@ for(int batch_kern_idx=0; ...@@ -2144,13 +2148,13 @@ for(int batch_kern_idx=0;
int b = batch_kern_idx / %(self_nkern)s; int b = batch_kern_idx / %(self_nkern)s;
int n_kern = batch_kern_idx %% %(self_nkern)s; int n_kern = batch_kern_idx %% %(self_nkern)s;
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(%(z)s,b,n_kern)); %(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0; for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){ for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d,b,stack_size)); const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size)); const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
int new_m; int new_m;
......
...@@ -105,41 +105,41 @@ class SoftmaxWithBias(gof.Op): ...@@ -105,41 +105,41 @@ class SoftmaxWithBias(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1] #TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
init_decl = """ init_decl = """
npy_intp* Nx = %(x)s->dimensions; npy_intp* Nx = PyArray_DIMS(%(x)s);
if (%(x)s->nd != 2) if (PyArray_NDIM(%(x)s) != 2)
{ {
PyErr_SetString(PyExc_ValueError, "a not 2d tensor"); PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
%(fail)s; %(fail)s;
} }
if (%(b)s->nd != 1) if (PyArray_NDIM(%(b)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "b not 1d tensor"); PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
%(fail)s; %(fail)s;
} }
if ((%(x)s->descr->type_num != PyArray_DOUBLE) && if ((PyArray_DESCR(%(x)s)->type_num != NPY_DOUBLE) &&
(%(x)s->descr->type_num != PyArray_FLOAT)) (PyArray_DESCR(%(x)s)->type_num != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, "a not float"); PyErr_SetString(PyExc_TypeError, "a not float");
%(fail)s; %(fail)s;
} }
if ((%(b)s->descr->type_num != PyArray_DOUBLE) && if ((PyArray_DESCR(%(b)s)->type_num != NPY_DOUBLE) &&
(%(b)s->descr->type_num != PyArray_FLOAT)) (PyArray_DESCR(%(b)s)->type_num != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, "b not float"); PyErr_SetString(PyExc_TypeError, "b not float");
%(fail)s; %(fail)s;
} }
if ((%(x)s->dimensions[1] != %(b)s->dimensions[0])) if ((PyArray_DIMS(%(x)s)[1] != PyArray_DIMS(%(b)s)[0]))
{ {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"number of columns in x (%%ld) does not match length of b (%%ld)", "number of columns in x (%%ld) does not match length of b (%%ld)",
(long int)%(x)s->dimensions[1], (long int)%(b)s->dimensions[0]); (long int)PyArray_DIMS(%(x)s)[1], (long int)PyArray_DIMS(%(b)s)[0]);
%(fail)s; %(fail)s;
} }
if ((NULL == %(sm)s) if ((NULL == %(sm)s)
|| (%(sm)s->dimensions[0] != %(x)s->dimensions[0]) || (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
|| (%(sm)s->dimensions[1] != %(x)s->dimensions[1])) || (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
{ {
if (NULL != %(sm)s) Py_XDECREF(%(sm)s); if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
%(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s), %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s),
...@@ -159,15 +159,15 @@ class SoftmaxWithBias(gof.Op): ...@@ -159,15 +159,15 @@ class SoftmaxWithBias(gof.Op):
double sum = 0.0; double sum = 0.0;
bool discount_max = false; bool discount_max = false;
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * i); const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_DATA(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(%(b)s->data); const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_DATA(%(b)s));
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i); dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_DATA(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
""" """
inside_row_loop = """ inside_row_loop = """
npy_intp Sx = %(x)s->strides[1]/sizeof(dtype_%(x)s); npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Sb = %(b)s->strides[0]/sizeof(dtype_%(b)s); npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s); npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
size_t row_max_j=0; size_t row_max_j=0;
dtype_%(sm)s row_max = x_i[0] + b_i[0]; dtype_%(sm)s row_max = x_i[0] + b_i[0];
...@@ -263,34 +263,34 @@ class SoftmaxGrad(gof.Op): ...@@ -263,34 +263,34 @@ class SoftmaxGrad(gof.Op):
dy, sm = inp dy, sm = inp
dx, = out dx, = out
return ''' return '''
if ((%(dy)s->descr->type_num != PyArray_DOUBLE) && if ((PyArray_DESCR(%(dy)s)->type_num != NPY_DOUBLE) &&
(%(dy)s->descr->type_num != PyArray_FLOAT)) (PyArray_DESCR(%(dy)s)->type_num != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"types should be float or float64"); "types should be float or float64");
%(fail)s; %(fail)s;
} }
if ((%(sm)s->descr->type_num != PyArray_DOUBLE) && if ((PyArray_DESCR(%(sm)s)->type_num != NPY_DOUBLE) &&
(%(sm)s->descr->type_num != PyArray_FLOAT)) (PyArray_DESCR(%(sm)s)->type_num != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"types should be float or float64"); "types should be float or float64");
%(fail)s; %(fail)s;
} }
if ((%(dy)s->nd != 2) if ((PyArray_NDIM(%(dy)s) != 2)
|| (%(sm)s->nd != 2)) || (PyArray_NDIM(%(sm)s) != 2))
{ {
PyErr_SetString(PyExc_ValueError, "rank error"); PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s; %(fail)s;
} }
if (%(dy)s->dimensions[0] != %(sm)s->dimensions[0]) if (PyArray_DIMS(%(dy)s)[0] != PyArray_DIMS(%(sm)s)[0])
{ {
PyErr_SetString(PyExc_ValueError, "dy.shape[0] != sm.shape[0]"); PyErr_SetString(PyExc_ValueError, "dy.shape[0] != sm.shape[0]");
%(fail)s; %(fail)s;
} }
if ((NULL == %(dx)s) if ((NULL == %(dx)s)
|| (%(dx)s->dimensions[0] != %(sm)s->dimensions[0]) || (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
|| (%(dx)s->dimensions[1] != %(sm)s->dimensions[1])) || (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
{ {
Py_XDECREF(%(dx)s); Py_XDECREF(%(dx)s);
%(dx)s = (PyArrayObject*) PyArray_SimpleNew(2, %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2,
...@@ -304,22 +304,22 @@ class SoftmaxGrad(gof.Op): ...@@ -304,22 +304,22 @@ class SoftmaxGrad(gof.Op):
} }
} }
for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i) for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
{ {
const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (%(dy)s->data + %(dy)s->strides[0] * i); const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (PyArray_DATA(%(dy)s) + PyArray_STRIDES(%(dy)s)[0] * i);
npy_intp Sdy = %(dy)s->strides[1]/sizeof(dtype_%(dy)s); npy_intp Sdy = PyArray_STRIDES(%(dy)s)[1]/sizeof(dtype_%(dy)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (%(sm)s->data + %(sm)s->strides[0] * i); const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (PyArray_DATA(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s); npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*) (%(dx)s->data + %(dx)s->strides[0] * i); dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*) (PyArray_DATA(%(dx)s) + PyArray_STRIDES(%(dx)s)[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s); npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
double sum_dy_times_sm = 0.; double sum_dy_times_sm = 0.;
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
{ {
dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm]; dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm];
sum_dy_times_sm += dx_i[j * Sdx]; sum_dy_times_sm += dx_i[j * Sdx];
} }
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
{ {
dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm]; dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm];
} }
...@@ -773,31 +773,31 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -773,31 +773,31 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
SoftmaxWithBias.c_code_template() SoftmaxWithBias.c_code_template()
return (init_decl, return (init_decl,
""" """
if (%(y_idx)s->nd != 1) if (PyArray_NDIM(%(y_idx)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor"); PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
%(fail)s; %(fail)s;
} }
if ((%(y_idx)s->descr->type_num != PyArray_INT64) if ((PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT64)
&& (%(y_idx)s->descr->type_num != PyArray_INT32) && (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT32)
&& (%(y_idx)s->descr->type_num != PyArray_INT16) && (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT16)
&& (%(y_idx)s->descr->type_num != PyArray_INT8)) && (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT8))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"y_idx not int8, int16, int32, or int64"); "y_idx not int8, int16, int32, or int64");
%(fail)s; %(fail)s;
} }
if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0]) if (PyArray_DIMS(%(x)s)[0] != PyArray_DIMS(%(y_idx)s)[0])
{ {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"number of rows in x (%%ld) does not match length of y (%%ld)", "number of rows in x (%%ld) does not match length of y (%%ld)",
(long int)%(x)s->dimensions[0], (long int)PyArray_DIMS(%(x)s)[0],
(long int)%(y_idx)s->dimensions[0]); (long int)PyArray_DIMS(%(y_idx)s)[0]);
%(fail)s; %(fail)s;
} }
if ((NULL == %(nll)s) //initial condition if ((NULL == %(nll)s) //initial condition
|| (%(nll)s->dimensions[0] != %(y_idx)s->dimensions[0])) || (PyArray_DIMS(%(nll)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
{ {
if (NULL != %(nll)s) Py_XDECREF(%(nll)s); if (NULL != %(nll)s) Py_XDECREF(%(nll)s);
%(nll)s = (PyArrayObject*)PyArray_SimpleNew(1, %(nll)s = (PyArrayObject*)PyArray_SimpleNew(1,
...@@ -810,7 +810,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -810,7 +810,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
} }
} }
if ((NULL == %(am)s) if ((NULL == %(am)s)
|| (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0])) || (PyArray_DIMS(%(am)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
{ {
Py_XDECREF(%(am)s); Py_XDECREF(%(am)s);
%(am)s = (PyArrayObject*) PyArray_SimpleNew(1, %(am)s = (PyArrayObject*) PyArray_SimpleNew(1,
...@@ -825,13 +825,13 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -825,13 +825,13 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
""", """,
begin_row_loop, begin_row_loop,
""" """
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_DATA(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(%(nll)s->data + %(nll)s->strides[0] * i); dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(PyArray_DATA(%(nll)s) + PyArray_STRIDES(%(nll)s)[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i); %(am_type)s* __restrict__ am_i = (%(am_type)s*) (PyArray_DATA(%(am)s) + PyArray_STRIDES(%(am)s)[0] * i);
""", """,
inside_row_loop, inside_row_loop,
""" """
if ((y_i >= %(x)s->dimensions[1]) || (y_i < 0)) if ((y_i >= PyArray_DIMS(%(x)s)[1]) || (y_i < 0))
{ {
PyErr_SetString(PyExc_ValueError, "y_i value out of bounds"); PyErr_SetString(PyExc_ValueError, "y_i value out of bounds");
%(fail)s; %(fail)s;
...@@ -914,55 +914,55 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -914,55 +914,55 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
y_idx_type = node.inputs[2].type.dtype_specs()[1] y_idx_type = node.inputs[2].type.dtype_specs()[1]
return """ return """
if ((%(dnll)s->descr->type_num != PyArray_DOUBLE) && if ((PyArray_DESCR(%(dnll)s)->type_num != NPY_DOUBLE) &&
(%(dnll)s->descr->type_num != PyArray_FLOAT)) (PyArray_DESCR(%(dnll)s)->type_num != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"dnll type should be float32 or float64"); "dnll type should be float32 or float64");
%(fail)s; %(fail)s;
} }
if ((%(sm)s->descr->type_num != PyArray_DOUBLE) && if ((PyArray_DESCR(%(sm)s)->type_num != NPY_DOUBLE) &&
(%(sm)s->descr->type_num != PyArray_FLOAT)) (PyArray_DESCR(%(sm)s)->type_num != NPY_FLOAT))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"sm type should be float32 or float64"); "sm type should be float32 or float64");
%(fail)s; %(fail)s;
} }
if ((%(y_idx)s->descr->type_num != PyArray_INT64) if ((PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT64)
&& (%(y_idx)s->descr->type_num != PyArray_INT32) && (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT32)
&& (%(y_idx)s->descr->type_num != PyArray_INT16) && (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT16)
&& (%(y_idx)s->descr->type_num != PyArray_INT8)) && (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT8))
{ {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"y_idx not int8, int16, int32, or int64"); "y_idx not int8, int16, int32, or int64");
%(fail)s; %(fail)s;
} }
if ((%(dnll)s->nd != 1) if ((PyArray_NDIM(%(dnll)s) != 1)
|| (%(sm)s->nd != 2) || (PyArray_NDIM(%(sm)s) != 2)
|| (%(y_idx)s->nd != 1)) || (PyArray_NDIM(%(y_idx)s) != 1))
{ {
PyErr_SetString(PyExc_ValueError, "rank error"); PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s; %(fail)s;
} }
if (%(dnll)s->dimensions[0] != %(sm)s->dimensions[0]) if (PyArray_DIMS(%(dnll)s)[0] != PyArray_DIMS(%(sm)s)[0])
{ {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"dnll.shape[0] (%%ld) != sm.shape[0] (%%ld)", "dnll.shape[0] (%%ld) != sm.shape[0] (%%ld)",
(long int)%(dnll)s->dimensions[0], (long int)PyArray_DIMS(%(dnll)s)[0],
(long int)%(sm)s->dimensions[0]); (long int)PyArray_DIMS(%(sm)s)[0]);
%(fail)s; %(fail)s;
} }
if (%(dnll)s->dimensions[0] != %(y_idx)s->dimensions[0]) if (PyArray_DIMS(%(dnll)s)[0] != PyArray_DIMS(%(y_idx)s)[0])
{ {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"dnll.shape[0] (%%ld) != y_idx.shape[0] (%%ld)", "dnll.shape[0] (%%ld) != y_idx.shape[0] (%%ld)",
(long int)%(dnll)s->dimensions[0], (long int)PyArray_DIMS(%(dnll)s)[0],
(long int)%(y_idx)s->dimensions[0]); (long int)PyArray_DIMS(%(y_idx)s)[0]);
%(fail)s; %(fail)s;
} }
if ((NULL == %(dx)s) if ((NULL == %(dx)s)
|| (%(dx)s->dimensions[0] != %(sm)s->dimensions[0]) || (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
|| (%(dx)s->dimensions[1] != %(sm)s->dimensions[1])) || (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
{ {
if (NULL != %(dx)s) Py_XDECREF(%(dx)s); if (NULL != %(dx)s) Py_XDECREF(%(dx)s);
%(dx)s = (PyArrayObject*) PyArray_SimpleNew(2, %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2,
...@@ -975,23 +975,23 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -975,23 +975,23 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
} }
} }
for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i) for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
{ {
const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0]; const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(PyArray_DATA(%(dnll)s) + PyArray_STRIDES(%(dnll)s)[0] * i))[0];
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0]; const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_DATA(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i); const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_DATA(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s); npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(%(dx)s->data + %(dx)s->strides[0] * i); dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(PyArray_DATA(%(dx)s) + PyArray_STRIDES(%(dx)s)[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s); npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j) for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
{ {
dx_i[j * Sdx] = dnll_i * sm_i[j * Ssm]; dx_i[j * Sdx] = dnll_i * sm_i[j * Ssm];
} }
if (y_i >= %(dx)s->dimensions[1]) if (y_i >= PyArray_DIMS(%(dx)s)[1])
{ {
PyErr_SetString(PyExc_ValueError, "y_i >= dx dimensions[1]"); PyErr_SetString(PyExc_ValueError, "y_i >= dx dimensions[1]");
%(fail)s; %(fail)s;
......
...@@ -619,15 +619,15 @@ class Shape_i(T.Op): ...@@ -619,15 +619,15 @@ class Shape_i(T.Op):
if isinstance(node.inputs[0].type, T.TensorType): if isinstance(node.inputs[0].type, T.TensorType):
return """ return """
if(!%(out)s) if(!%(out)s)
%(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, PyArray_INT64, 0); %(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(out)s))[0]=%(x)s->dimensions[%(i)s]; ((npy_int64*)PyArray_DATA(%(out)s))[0]=PyArray_DIMS(%(x)s)[%(i)s];
""" % locals() """ % locals()
elif node.inputs[0].type.__class__.__name__ == "CudaNdarrayType": elif node.inputs[0].type.__class__.__name__ == "CudaNdarrayType":
#Don't want to import cuda stuff here. #Don't want to import cuda stuff here.
return """ return """
if(!%(out)s) if(!%(out)s)
%(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, PyArray_INT64, 0); %(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(out)s))[0]= ((npy_int64*)PyArray_DATA(%(out)s))[0]=
CudaNdarray_HOST_DIMS(%(x)s)[%(i)s]; CudaNdarray_HOST_DIMS(%(x)s)[%(i)s];
""" % locals() """ % locals()
......
...@@ -176,13 +176,13 @@ class DownsampleFactorMax(Op): ...@@ -176,13 +176,13 @@ class DownsampleFactorMax(Op):
int x_shp0_usable; int x_shp0_usable;
int x_shp1_usable; int x_shp1_usable;
int z_shp0, z_shp1; int z_shp0, z_shp1;
if(%(x)s->nd!=4) if(PyArray_NDIM(%(x)s)!=4)
{ {
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
%(fail)s; %(fail)s;
} }
z_shp0 = %(x)s->dimensions[2] / %(ds0)s; z_shp0 = PyArray_DIMS(%(x)s)[2] / %(ds0)s;
z_shp1 = %(x)s->dimensions[3] / %(ds1)s; z_shp1 = PyArray_DIMS(%(x)s)[3] / %(ds1)s;
if (%(ignore_border)s) if (%(ignore_border)s)
{ {
x_shp0_usable = z_shp0 * %(ds0)s; x_shp0_usable = z_shp0 * %(ds0)s;
...@@ -190,23 +190,23 @@ class DownsampleFactorMax(Op): ...@@ -190,23 +190,23 @@ class DownsampleFactorMax(Op):
} }
else else
{ {
z_shp0 += (%(x)s->dimensions[2] %% %(ds0)s) ? 1 : 0; z_shp0 += (PyArray_DIMS(%(x)s)[2] %% %(ds0)s) ? 1 : 0;
z_shp1 += (%(x)s->dimensions[3] %% %(ds1)s) ? 1 : 0; z_shp1 += (PyArray_DIMS(%(x)s)[3] %% %(ds1)s) ? 1 : 0;
x_shp0_usable = %(x)s->dimensions[2]; x_shp0_usable = PyArray_DIMS(%(x)s)[2];
x_shp1_usable = %(x)s->dimensions[3]; x_shp1_usable = PyArray_DIMS(%(x)s)[3];
} }
if ((!%(z)s) if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4 || *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(x)s->dimensions[0]) ||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(x)s)[0])
||(%(z)s->dimensions[1] != %(x)s->dimensions[1]) ||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(x)s)[1])
||(%(z)s->dimensions[2] != z_shp0) ||(PyArray_DIMS(%(z)s)[2] != z_shp0)
||(%(z)s->dimensions[3] != z_shp1) ||(PyArray_DIMS(%(z)s)[3] != z_shp1)
) )
{ {
if (%(z)s) Py_XDECREF(%(z)s); if (%(z)s) Py_XDECREF(%(z)s);
npy_intp dims[4] = {0,0,0,0}; npy_intp dims[4] = {0,0,0,0};
dims[0]=%(x)s->dimensions[0]; dims[0]=PyArray_DIMS(%(x)s)[0];
dims[1]=%(x)s->dimensions[1]; dims[1]=PyArray_DIMS(%(x)s)[1];
dims[2]=z_shp0; dims[2]=z_shp0;
dims[3]=z_shp1; dims[3]=z_shp1;
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0); //TODO: zeros not necessary %(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0); //TODO: zeros not necessary
...@@ -214,8 +214,8 @@ class DownsampleFactorMax(Op): ...@@ -214,8 +214,8 @@ class DownsampleFactorMax(Op):
if (z_shp0 && z_shp1) if (z_shp0 && z_shp1)
{ {
for(int b=0;b<%(x)s->dimensions[0];b++){ for(int b=0;b<PyArray_DIMS(%(x)s)[0];b++){
for(int k=0;k<%(x)s->dimensions[1];k++){ for(int k=0;k<PyArray_DIMS(%(x)s)[1];k++){
int mini_i = 0; int mini_i = 0;
int zi = 0; int zi = 0;
for(int i=0;i< x_shp0_usable; i++){ for(int i=0;i< x_shp0_usable; i++){
...@@ -306,23 +306,23 @@ class DownsampleFactorMaxGrad(Op): ...@@ -306,23 +306,23 @@ class DownsampleFactorMaxGrad(Op):
PyErr_SetString(PyExc_ValueError, "input types must all match"); PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)s; %(fail)s;
} }
if(%(x)s->nd!=4) if(PyArray_NDIM(%(x)s)!=4)
{ {
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
%(fail)s; %(fail)s;
} }
if(%(z)s->nd!=4) if(PyArray_NDIM(%(z)s)!=4)
{ {
PyErr_SetString(PyExc_ValueError, "z must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "z must be a 4d ndarray");
%(fail)s; %(fail)s;
} }
if(%(gz)s->nd!=4) if(PyArray_NDIM(%(gz)s)!=4)
{ {
PyErr_SetString(PyExc_ValueError, "gz must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "gz must be a 4d ndarray");
%(fail)s; %(fail)s;
} }
z_shp0 = %(z)s->dimensions[2]; z_shp0 = PyArray_DIMS(%(z)s)[2];
z_shp1 = %(z)s->dimensions[3]; z_shp1 = PyArray_DIMS(%(z)s)[3];
if (%(ignore_border)s) if (%(ignore_border)s)
{ {
x_shp0_usable = z_shp0 * %(ds0)s; x_shp0_usable = z_shp0 * %(ds0)s;
...@@ -330,23 +330,23 @@ class DownsampleFactorMaxGrad(Op): ...@@ -330,23 +330,23 @@ class DownsampleFactorMaxGrad(Op):
} }
else else
{ {
x_shp0_usable = %(x)s->dimensions[2]; x_shp0_usable = PyArray_DIMS(%(x)s)[2];
x_shp1_usable = %(x)s->dimensions[3]; x_shp1_usable = PyArray_DIMS(%(x)s)[3];
} }
if ((!%(gx)s) if ((!%(gx)s)
|| *PyArray_DIMS(%(gx)s)!=4 || *PyArray_DIMS(%(gx)s)!=4
||(%(gx)s->dimensions[0] != %(x)s->dimensions[0]) ||(PyArray_DIMS(%(gx)s)[0] != PyArray_DIMS(%(x)s)[0])
||(%(gx)s->dimensions[1] != %(x)s->dimensions[1]) ||(PyArray_DIMS(%(gx)s)[1] != PyArray_DIMS(%(x)s)[1])
||(%(gx)s->dimensions[2] != %(x)s->dimensions[2]) ||(PyArray_DIMS(%(gx)s)[2] != PyArray_DIMS(%(x)s)[2])
||(%(gx)s->dimensions[3] != %(x)s->dimensions[3]) ||(PyArray_DIMS(%(gx)s)[3] != PyArray_DIMS(%(x)s)[3])
) )
{ {
Py_XDECREF(%(gx)s); Py_XDECREF(%(gx)s);
%(gx)s = (PyArrayObject*) PyArray_ZEROS(4, %(x)s->dimensions, x_typenum,0); %(gx)s = (PyArrayObject*) PyArray_ZEROS(4, PyArray_DIMS(%(x)s), x_typenum,0);
} }
for(int b=0;b<%(x)s->dimensions[0];b++){ for(int b=0;b<PyArray_DIMS(%(x)s)[0];b++){
for(int k=0;k<%(x)s->dimensions[1];k++){ for(int k=0;k<PyArray_DIMS(%(x)s)[1];k++){
int mini_i = 0; int mini_i = 0;
int zi = 0; int zi = 0;
for(int i=0;i< x_shp0_usable; i++){ for(int i=0;i< x_shp0_usable; i++){
...@@ -364,14 +364,14 @@ class DownsampleFactorMaxGrad(Op): ...@@ -364,14 +364,14 @@ class DownsampleFactorMaxGrad(Op):
mini_i = (mini_i + 1 == %(ds0)s) ? 0 : mini_i+1; mini_i = (mini_i + 1 == %(ds0)s) ? 0 : mini_i+1;
zi += (mini_i == 0); zi += (mini_i == 0);
for (int j = x_shp1_usable; j < %(x)s->dimensions[3]; ++j) { for (int j = x_shp1_usable; j < PyArray_DIMS(%(x)s)[3]; ++j) {
dtype_%(gx)s * gxp = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s,b,k,i,j))); dtype_%(gx)s * gxp = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s,b,k,i,j)));
gxp[0] = 0; gxp[0] = 0;
} }
}//for i }//for i
for(int i = x_shp0_usable; i < %(x)s->dimensions[2]; i++){ for(int i = x_shp0_usable; i < PyArray_DIMS(%(x)s)[2]; i++){
for (int j = 0; j < %(x)s->dimensions[3]; ++j) { for (int j = 0; j < PyArray_DIMS(%(x)s)[3]; ++j) {
dtype_%(gx)s * gxp = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s,b,k,i,j))); dtype_%(gx)s * gxp = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s,b,k,i,j)));
gxp[0] = 0; gxp[0] = 0;
} }
......
...@@ -5846,11 +5846,12 @@ class test_arithmetic_cast(unittest.TestCase): ...@@ -5846,11 +5846,12 @@ class test_arithmetic_cast(unittest.TestCase):
config.int_division == 'floatX'): config.int_division == 'floatX'):
assert theano_dtype == config.floatX assert theano_dtype == config.floatX
continue continue
numpy_version =numpy.__version__.split('.')[:2]
if (cfg == 'numpy+floatX' and if (cfg == 'numpy+floatX' and
a_type == 'complex128' and a_type == 'complex128' and
b_type == 'float32' and b_type == 'float32' and
combo == ('scalar', 'array') and combo == ('scalar', 'array') and
numpy.__version__.startswith('1.6.') and bool(numpy_version >= [1, 6]) and
theano_dtype == 'complex128' and theano_dtype == 'complex128' and
numpy_dtypes == ['complex64', numpy_dtypes == ['complex64',
'complex64']): 'complex64']):
...@@ -5860,7 +5861,7 @@ class test_arithmetic_cast(unittest.TestCase): ...@@ -5860,7 +5861,7 @@ class test_arithmetic_cast(unittest.TestCase):
# in progress), so in the meantime we just # in progress), so in the meantime we just
# mark this test as a known failure. # mark this test as a known failure.
raise KnownFailureTest('Known issue with ' raise KnownFailureTest('Known issue with '
'numpy 1.6.x, see #761') 'numpy >= 1.6.x see #761')
# In any other situation: something wrong is # In any other situation: something wrong is
# going on! # going on!
......
...@@ -918,7 +918,7 @@ class T_fibby(unittest.TestCase): ...@@ -918,7 +918,7 @@ class T_fibby(unittest.TestCase):
return """ return """
Py_XDECREF(%(y)s); Py_XDECREF(%(y)s);
%(y)s = (PyArrayObject*)PyArray_FromArray( %(y)s = (PyArrayObject*)PyArray_FromArray(
%(x)s, 0, NPY_ENSURECOPY); %(x)s, 0, NPY_ARRAY_ENSURECOPY);
if (!(%y)s) %(fail)s; if (!(%y)s) %(fail)s;
dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data; dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data;
dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data; dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论