提交 007540a5 authored 作者: lamblin's avatar lamblin

Merge pull request #919 from nouiz/np17

Np17
......@@ -63,7 +63,7 @@ you should check the strides and alignment.
return """
Py_XDECREF(%(y)s);
%(y)s = (PyArrayObject*)PyArray_FromArray(
%(x)s, 0, NPY_ENSURECOPY);
%(x)s, 0, NPY_ARRAY_ENSURECOPY);
if (!(%y)s) %(fail)s;
dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data;
dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data;
......@@ -147,7 +147,7 @@ the correct size for the output. This is essentially simulating the line
Py_XDECREF(%(y)s);
%(y)s = (PyArrayObject*)PyArray_FromArray(
%(x)s, 0, NPY_ENSURECOPY);
%(x)s, 0, NPY_ARRAY_ENSURECOPY);
The first line reduces the reference count of the data that y originally
pointed to. The second line allocates the new data and makes y point to it.
......
......@@ -56,30 +56,30 @@ class BROKEN_ON_PURPOSE_Add(gof.Op):
a, b = inp
z, = out
return """
if (%(a)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 1"); %(fail)s;}
if (%(b)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1"); %(fail)s;}
if (PyArray_NDIM(%(a)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 1"); %(fail)s;}
if (PyArray_NDIM(%(b)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1"); %(fail)s;}
if (%(a)s->descr->type_num != PyArray_DOUBLE)
if (PyArray_DESCR(%(a)s)->type_num != NPY_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "a dtype not NPY_DOUBLE"); %(fail)s;}
if (%(b)s->descr->type_num != PyArray_DOUBLE)
if (PyArray_DESCR(%(b)s)->type_num != NPY_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "b's dtype not NPY_DOUBLE"); %(fail)s;}
if (%(a)s->dimensions[0] != %(b)s->dimensions[0])
if (PyArray_DIMS(%(a)s)[0] != PyArray_DIMS(%(b)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a and b have different lengths"); %(fail)s;}
if ((!%(z)s)
|| (%(z)s->dimensions[0] != %(b)s->dimensions[0])
|| (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(b)s)[0])
)
{
{Py_XDECREF(%(z)s);}
npy_intp dims[] = {0};
dims[0] = %(b)s->dimensions[0];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(b)s->descr->type_num);
dims[0] = PyArray_DIMS(%(b)s)[0];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, PyArray_DESCR(%(b)s)->type_num);
}
{
for (npy_intp m = 0; m < %(z)s->dimensions[0]; ++m)
for (npy_intp m = 0; m < PyArray_DIMS(%(z)s)[0]; ++m)
{
((double*)PyArray_GETPTR1(%(z)s, m))[0]
= 0.5
......@@ -150,13 +150,13 @@ class WeirdBrokenOp(gof.Op):
else:
z_code = """
{Py_XDECREF(%(z)s);}
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, %(a)s->dimensions, %(a)s->descr->type_num);
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(a)s), PyArray_DESCR(%(a)s)->type_num);
"""
prep_vars = """
//the output array has size M x N
npy_intp M = %(a)s->dimensions[0];
npy_intp Sa = %(a)s->strides[0] / %(a)s->descr->elsize;
npy_intp Sz = %(z)s->strides[0] / %(z)s->descr->elsize;
npy_intp M = PyArray_DIMS(%(a)s)[0];
npy_intp Sa = %(a)s->strides[0] / PyArray_DESCR(%(a)s)->elsize;
npy_intp Sz = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
npy_double * Da = (npy_double*)%(a)s->data;
npy_double * Dz = (npy_double*)%(z)s->data;
......@@ -603,22 +603,22 @@ class BrokenCImplementationAdd(gof.Op):
debug = 0
return """
//printf("executing c_code\\n");
if (%(a)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 2"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (PyArray_NDIM(%(a)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 2"); %(fail)s;}
if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(a)s->descr->type_num != PyArray_FLOAT)
if (PyArray_DESCR(%(a)s)->type_num != NPY_FLOAT)
{PyErr_SetString(PyExc_NotImplementedError, "a dtype not NPY_FLOAT"); %(fail)s;}
if (%(b)s->descr->type_num != PyArray_FLOAT)
if (PyArray_DESCR(%(b)s)->type_num != NPY_FLOAT)
{PyErr_SetString(PyExc_NotImplementedError, "b's dtype not NPY_FLOAT"); %(fail)s;}
if (%(a)s->dimensions[0] != %(a)s->dimensions[1])
if (PyArray_DIMS(%(a)s)[0] != PyArray_DIMS(%(a)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "a is not square"); %(fail)s;}
if (%(b)s->dimensions[0] != %(b)s->dimensions[1])
if (PyArray_DIMS(%(b)s)[0] != PyArray_DIMS(%(b)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "b is not square"); %(fail)s;}
if (%(a)s->dimensions[0] != %(b)s->dimensions[0])
if (PyArray_DIMS(%(a)s)[0] != PyArray_DIMS(%(b)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a and b have different dimensions"); %(fail)s;}
// We do not check for c_contiguous property here
......@@ -626,32 +626,32 @@ class BrokenCImplementationAdd(gof.Op):
{
if (!%(z)s)
printf("%(z)s is not there, %%p \\n", %(z)s);
else if (%(z)s->dimensions[0] != %(b)s->dimensions[0])
else if (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(b)s)[0])
printf("Dimension 0 mismatch for %(z)s and %(b)s\\n");
else if (%(z)s->dimensions[1] != %(b)s->dimensions[1])
else if (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1])
printf("Dimension 1 mismatch for %(z)s and %(b)s\\n");
else
printf("Reusing %(z)s\\n");
}
if ((!%(z)s)
|| (%(z)s->dimensions[0] != %(b)s->dimensions[0])
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1])
|| (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(b)s)[0])
|| (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1])
)
{
Py_XDECREF(%(z)s);
npy_intp dims[] = {0, 0};
dims[0] = %(b)s->dimensions[0];
dims[1] = %(b)s->dimensions[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(b)s->descr->type_num);
dims[0] = PyArray_DIMS(%(b)s)[0];
dims[1] = PyArray_DIMS(%(b)s)[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, PyArray_DESCR(%(b)s)->type_num);
}
// Let us assume that %(z)s is c_contiguous
{
dtype_%(z)s * z = ((dtype_%(z)s*)(PyArray_GETPTR2(%(z)s,0,0)));
for (int i=0; i<%(b)s->dimensions[0]; i++)
for (int i=0; i<PyArray_DIMS(%(b)s)[0]; i++)
{
for (int j=0; j<%(b)s->dimensions[1]; j++)
for (int j=0; j<PyArray_DIMS(%(b)s)[1]; j++)
{
*z = ((float*)PyArray_GETPTR2(%(a)s, i, j))[0] +
((float*)PyArray_GETPTR2(%(b)s, i, j))[0] ;
......
......@@ -1453,6 +1453,22 @@ class GCC_compiler(object):
@staticmethod
def compile_args():
cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag]
#NumPy 1.7 Deprecate the old API. I updated most of the places
#to use the new API, but not everywhere. When finished, enable
#the following macro to assert that we don't bring new code
#that use the old API.
#cxxflags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
# numpy 1.7 deprecated the following macro but the didn't
# existed in the past
if bool(numpy_ver < [1, 7]):
cxxflags.append("-D NPY_ARRAY_ENSURECOPY=NPY_ENSURECOPY")
cxxflags.append("-D NPY_ARRAY_ALIGNED=NPY_ALIGNED")
cxxflags.append("-D NPY_ARRAY_WRITEABLE=NPY_WRITEABLE")
cxxflags.append("-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
cxxflags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
cxxflags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")
return cxxflags
@staticmethod
......
......@@ -288,7 +288,8 @@ class TestComputeTestValue(unittest.TestCase):
# The second is a new message in numpy 1.6.
assert (str(e).startswith("shape mismatch") or
str(e).startswith("operands could not be broadcast "
"together with shapes"))
"together with shapes") or
str(e).startswith("could not broadcast input"))
finally:
theano.config.compute_test_value = orig_compute_test_value
......
......@@ -123,7 +123,7 @@ class GpuDot22Scalar(GpuOp):
fail = sub['fail']
return """
#define REAL float
float %(name)s_a = (%(a)s->descr->type_num == PyArray_FLOAT)
float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT)
? (REAL)(((float*)%(a)s->data)[0])
: (REAL)(((double*)%(a)s->data)[0]);
#undef REAL
......@@ -231,11 +231,11 @@ class GpuGemm(GpuOp):
print >> sio, """
#define REAL float
float %(name)s_a = (%(a)s->descr->type_num == PyArray_FLOAT)
float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT)
? (REAL)(((float*)%(a)s->data)[0])
: (REAL)(((double*)%(a)s->data)[0]);
float %(name)s_b = (%(b)s->descr->type_num == PyArray_FLOAT) ?
float %(name)s_b = (PyArray_TYPE(%(b)s) == NPY_FLOAT) ?
(REAL)(((float*)%(b)s->data)[0])
: (REAL)(((double*)%(b)s->data)[0]);
#undef REAL
......
......@@ -788,7 +788,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
PyErr_SetString(PyExc_TypeError, "CudaNdarray_TakeFrom: need a ndarray for indices with dtype int32");
return NULL;
}
if (((PyArrayObject*)indices_obj)->nd != 1) {
if (PyArray_NDIM(((PyArrayObject*)indices_obj)) != 1) {
PyErr_SetString(PyExc_TypeError, "CudaNdarray_TakeFrom: need a CudaNdarray of indices with only 1 dimensions");
return NULL;
}
......@@ -2921,7 +2921,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
Py_DECREF(broadcastable);
return NULL;
}
for (int i = 0; i < data->nd; ++i)
for (int i = 0; i < PyArray_NDIM(data); ++i)
{
if ((data->dimensions[i] > 1) && PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i))))
{
......@@ -3102,7 +3102,7 @@ cublas_shutdown()
int
CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj)
{
int err = CudaNdarray_alloc_contiguous(self, obj->nd, obj->dimensions);
int err = CudaNdarray_alloc_contiguous(self, PyArray_NDIM(obj), obj->dimensions);
if (err) {
return err;
}
......
......@@ -214,7 +214,7 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
%(fail)s;
}
if (%(neib_shape)s->dimensions[0] != 2)
if (PyArray_DIMS(%(neib_shape)s)[0] != 2)
{
PyErr_Format(PyExc_ValueError,
"neib_shape has to contain two elements");
......
......@@ -153,7 +153,7 @@ class CURAND_Base(GpuOp):
%(ndim)s, %(size)s->dimensions[0]);
%(fail)s
}
if (%(size)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(size)s)->type_num != NPY_INT32)
{
PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)s
......
......@@ -55,31 +55,31 @@ class MultinomialFromUniform(Op):
fail = sub['fail']
return """
if (%(pvals)s->nd != 2)
if (PyArray_NDIM(%(pvals)s) != 2)
{
PyErr_Format(PyExc_TypeError, "pvals wrong rank");
%(fail)s;
}
if (%(unis)s->nd != 1)
if (PyArray_NDIM(%(unis)s) != 1)
{
PyErr_Format(PyExc_TypeError, "unis wrong rank");
%(fail)s;
}
if (%(unis)s->dimensions[0] != %(pvals)s->dimensions[0])
if (PyArray_DIMS(%(unis)s)[0] != PyArray_DIMS(%(pvals)s)[0])
{
PyErr_Format(PyExc_ValueError, "unis.shape[0] != pvals.shape[0]");
%(fail)s;
}
if ((NULL == %(z)s)
|| ((%(z)s->dimensions)[0] != (%(pvals)s->dimensions)[0])
|| ((%(z)s->dimensions)[1] != (%(pvals)s->dimensions)[1])
|| ((PyArray_DIMS(%(z)s))[0] != (PyArray_DIMS(%(pvals)s))[0])
|| ((PyArray_DIMS(%(z)s))[1] != (PyArray_DIMS(%(pvals)s))[1])
)
{
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_ZEROS(2,
%(pvals)s->dimensions,
PyArray_DIMS(%(pvals)s),
type_num_%(z)s,
0);
if (!%(z)s)
......@@ -91,8 +91,8 @@ class MultinomialFromUniform(Op):
{ // NESTED SCOPE
const int nb_multi = %(pvals)s->dimensions[0];
const int nb_outcomes = %(pvals)s->dimensions[1];
const int nb_multi = PyArray_DIMS(%(pvals)s)[0];
const int nb_outcomes = PyArray_DIMS(%(pvals)s)[1];
//
// For each multinomial, loop over each possible outcome
......@@ -233,12 +233,12 @@ class GpuMultinomialFromUniform(MultinomialFromUniform, GpuOp):
fail = sub['fail']
return """
if (%(pvals)s->nd != 2)
if (PyArray_NDIM(%(pvals)s) != 2)
{
PyErr_Format(PyExc_TypeError, "pvals wrong rank");
%(fail)s;
}
if (%(unis)s->nd != 1)
if (PyArray_NDIM(%(unis)s) != 1)
{
PyErr_Format(PyExc_TypeError, "unis wrong rank");
%(fail)s;
......
......@@ -114,28 +114,28 @@ class Images2Neibs(Op):
int grid_c = -1; //number of patch in height
int grid_d = -1; //number of patch in width
{
if (%(ten4)s->nd != 4)
if (PyArray_NDIM(%(ten4)s) != 4)
{
PyErr_Format(PyExc_TypeError, "ten4 wrong rank");
%(fail)s;
}
if (%(neib_shape)s->nd != 1)
if (PyArray_NDIM(%(neib_shape)s) != 1)
{
PyErr_Format(PyExc_TypeError, "neib_shape wrong rank");
%(fail)s;
}
if ( (%(neib_shape)s->dimensions)[0] != 2)
if ( (PyArray_DIMS(%(neib_shape)s))[0] != 2)
{
PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to"
" contain 2 elements");
%(fail)s;
}
if (%(neib_step)s->nd != 1)
if (PyArray_NDIM(%(neib_step)s) != 1)
{
PyErr_Format(PyExc_TypeError, "neib_step wrong rank");
%(fail)s;
}
if ( (%(neib_step)s->dimensions)[0] != 2)
if ( (PyArray_DIMS(%(neib_step)s))[0] != 2)
{
PyErr_Format(PyExc_TypeError,
"neib_step wrong step ; has to contain 2 elements");
......@@ -154,33 +154,33 @@ class Images2Neibs(Op):
PyErr_Format(PyExc_TypeError, "Images2Neibs: in mode wrap_centered need patch with odd shapes");
%(fail)s;
}
if ( (%(ten4)s->dimensions)[2] < c || (%(ten4)s->dimensions)[3] < d)
if ( (PyArray_DIMS(%(ten4)s))[2] < c || (PyArray_DIMS(%(ten4)s))[3] < d)
{
PyErr_Format(PyExc_TypeError, "Images2Neibs: in wrap_centered mode, don't support image shapes smaller then the patch shapes: neib_shape=(%%ld,%%ld), ten4[2:]=[%%ld,%%ld]",
(long int)c, (long int)d, (long int)(%(ten4)s->dimensions[2]), (long int)(%(ten4)s->dimensions[3]));
(long int)c, (long int)d, (long int)(PyArray_DIMS(%(ten4)s)[2]), (long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s;
}
grid_c = CEIL_INTDIV(((%(ten4)s->dimensions)[2]),step_x);
grid_d = CEIL_INTDIV(((%(ten4)s->dimensions)[3]),step_y);
grid_c = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[2]),step_x);
grid_d = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[3]),step_y);
}else if ( "%(mode)s" == "valid") {
if ( ((%(ten4)s->dimensions)[2] < c) ||( (((%(ten4)s->dimensions)[2]-c) %% step_x)!=0))
if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||( (((PyArray_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
{
PyErr_Format(PyExc_TypeError, "neib_shape[0]=%%ld, neib_step[0]=%%ld and ten4.shape[2]=%%ld not consistent",
(long int)c, (long int)step_x, (long int)(%(ten4)s->dimensions[2]));
(long int)c, (long int)step_x, (long int)(PyArray_DIMS(%(ten4)s)[2]));
%(fail)s;
}
if ( ((%(ten4)s->dimensions)[3] < d) ||( (((%(ten4)s->dimensions)[3]-d) %% step_y)!=0))
if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||( (((PyArray_DIMS(%(ten4)s))[3]-d) %% step_y)!=0))
{
PyErr_Format(PyExc_TypeError, "neib_shape[1]=%%ld, neib_step[1]=%%ld and ten4.shape[3]=%%ld not consistent",
(long int)d, (long int)step_y, (long int)(%(ten4)s->dimensions[3]));
(long int)d, (long int)step_y, (long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s;
}
grid_c = 1+(((%(ten4)s->dimensions)[2]-c)/step_x); //number of patch in height
grid_d = 1+(((%(ten4)s->dimensions)[3]-d)/step_y); //number of patch in width
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x); //number of patch in height
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y); //number of patch in width
}else if ( "%(mode)s" == "ignore_borders") {
grid_c = 1+(((%(ten4)s->dimensions)[2]-c)/step_x); //number of patch in height
grid_d = 1+(((%(ten4)s->dimensions)[3]-d)/step_y); //number of patch in width
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x); //number of patch in height
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y); //number of patch in width
}else{
PyErr_Format(PyExc_TypeError, "Images2Neibs: unknow mode '%(mode)s'");
%(fail)s;
......@@ -190,12 +190,12 @@ class Images2Neibs(Op):
const npy_intp z_dim1 = c * d;
const npy_intp z_dim0 = grid_c
* grid_d
* (%(ten4)s->dimensions)[1]
* (%(ten4)s->dimensions)[0];
* (PyArray_DIMS(%(ten4)s))[1]
* (PyArray_DIMS(%(ten4)s))[0];
if ((NULL == %(z)s)
|| ((%(z)s->dimensions)[0] != z_dim0 )
|| ((%(z)s->dimensions)[1] != z_dim1 )
|| ((PyArray_DIMS(%(z)s))[0] != z_dim0 )
|| ((PyArray_DIMS(%(z)s))[1] != z_dim1 )
)
{
Py_XDECREF(%(z)s);
......@@ -218,10 +218,10 @@ class Images2Neibs(Op):
{ // NESTED SCOPE
const int nb_batch = (%(ten4)s->dimensions)[0];
const int nb_stack = (%(ten4)s->dimensions)[1];
const int height = (%(ten4)s->dimensions)[2];
const int width = (%(ten4)s->dimensions)[3];
const int nb_batch = (PyArray_DIMS(%(ten4)s))[0];
const int nb_stack = (PyArray_DIMS(%(ten4)s))[1];
const int height = (PyArray_DIMS(%(ten4)s))[2];
const int width = (PyArray_DIMS(%(ten4)s))[3];
// (c,d) = neib_shape
const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
......
......@@ -220,9 +220,9 @@ class mrg_uniform(mrg_uniform_base):
rstate, size = inp
o_rstate, o_sample = out
if self.inplace:
o_rstate_requirement = 'NPY_C_CONTIGUOUS|NPY_ALIGNED'
o_rstate_requirement = 'NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED'
else:
o_rstate_requirement = 'NPY_ENSURECOPY|NPY_C_CONTIGUOUS|NPY_ALIGNED'
o_rstate_requirement = 'NPY_ARRAY_ENSURECOPY|NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_ALIGNED'
ndim = self.output_type.ndim
o_type_num = numpy.asarray(0, dtype=self.output_type.dtype).dtype.num
fail = sub['fail']
......@@ -241,7 +241,7 @@ class mrg_uniform(mrg_uniform_base):
int n_elements = 1;
int n_streams = 0;
int must_alloc_sample = ((NULL == %(o_sample)s)
|| (%(o_sample)s->nd != %(ndim)s)
|| (PyArray_NDIM(%(o_sample)s) != %(ndim)s)
|| !(PyArray_ISCONTIGUOUS(%(o_sample)s)));
%(otype)s * sample_data;
npy_int32 * state_data;
......@@ -261,18 +261,18 @@ class mrg_uniform(mrg_uniform_base):
const npy_int32 MASK2 = 65535; //2^16 - 1
const npy_int32 MULT2 = 21069;
if (%(size)s->nd != 1)
if (PyArray_NDIM(%(size)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)s
}
if (%(size)s->dimensions[0] != %(ndim)s)
if (PyArray_DIMS(%(size)s)[0] != %(ndim)s)
{
PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)",
%(ndim)s, int(%(size)s->dimensions[0]));
%(ndim)s, int(PyArray_DIMS(%(size)s)[0]));
%(fail)s
}
if (%(size)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(size)s)->type_num != NPY_INT32)
{
PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)s
......@@ -281,7 +281,7 @@ class mrg_uniform(mrg_uniform_base):
{
odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0];
n_elements *= odims[i];
must_alloc_sample = must_alloc_sample || (%(o_sample)s->dimensions[i] != odims[i]);
must_alloc_sample = must_alloc_sample || (PyArray_DIMS(%(o_sample)s)[i] != odims[i]);
//fprintf(stderr, "size %%i %%i\\n", i, (int)odims[i]);
}
if (must_alloc_sample)
......@@ -296,22 +296,22 @@ class mrg_uniform(mrg_uniform_base):
Py_XDECREF(%(o_rstate)s);
%(o_rstate)s = (PyArrayObject*)PyArray_FromAny(py_%(rstate)s, NULL, 0, 0, %(o_rstate_requirement)s,NULL);
if (%(o_rstate)s->nd != 2)
if (PyArray_NDIM(%(o_rstate)s) != 2)
{
PyErr_SetString(PyExc_ValueError, "rstate must be matrix");
%(fail)s
}
if (%(o_rstate)s->dimensions[1] != 6)
if (PyArray_DIMS(%(o_rstate)s)[1] != 6)
{
PyErr_Format(PyExc_ValueError, "rstate must have 6 columns");
%(fail)s
}
if (%(o_rstate)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(o_rstate)s)->type_num != NPY_INT32)
{
PyErr_SetString(PyExc_ValueError, "rstate must be int32");
%(fail)s
}
n_streams = %(o_rstate)s->dimensions[0];
n_streams = PyArray_DIMS(%(o_rstate)s)[0];
sample_data = (%(otype)s *) %(o_sample)s->data;
state_data = (npy_int32 *) %(o_rstate)s->data;
......@@ -501,20 +501,20 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
int must_alloc_sample = ((NULL == %(o_sample)s)
|| !CudaNdarray_Check(py_%(o_sample)s)
|| !CudaNdarray_is_c_contiguous(%(o_sample)s)
|| (%(o_sample)s->nd != %(ndim)s));
|| (PyArray_NDIM(%(o_sample)s) != %(ndim)s));
if (%(size)s->nd != 1)
if (PyArray_NDIM(%(size)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)s
}
if (%(size)s->dimensions[0] != %(ndim)s)
if (PyArray_DIMS(%(size)s)[0] != %(ndim)s)
{
PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)",
%(ndim)s, %(size)s->dimensions[0]);
%(ndim)s, PyArray_DIMS(%(size)s)[0]);
%(fail)s
}
if (%(size)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(size)s)->type_num != NPY_INT32)
{
PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)s
......@@ -552,7 +552,7 @@ class GPU_mrg_uniform(mrg_uniform_base, GpuOp):
%(o_rstate)s = (CudaNdarray*)CudaNdarray_Copy(%(rstate)s);
}
if (%(o_rstate)s->nd != 1)
if (PyArray_NDIM(%(o_rstate)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "rstate must be vector");
%(fail)s;
......
......@@ -3006,38 +3006,38 @@ class StructuredDotGradCSC(gof.Op):
'g_ab')
return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (%(_indices)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (%(_indptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if (PyArray_NDIM(%(_d)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (PyArray_NDIM(%(_g)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1])
if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
if (!%(_zout)s
|| (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0]))
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]))
{
Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_indices)s->dimensions, %(_g)s->descr->type_num);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_g)s)->type_num);
}
{ //makes it compile even though labels jump over variable definitions.
npy_intp nnz = %(_indices)s->dimensions[0];
npy_intp N = %(_indptr)s->dimensions[0]-1; //TODO: error checking with this
npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/%(_indices)s->descr->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/%(_indptr)s->descr->elsize;
npy_intp Sindices = %(_indices)s->strides[0]/PyArray_DESCR(%(_indices)s)->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/PyArray_DESCR(%(_indptr)s)->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/%(_d)s->descr->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/%(_g)s->descr->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/PyArray_DESCR(%(_d)s)->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/PyArray_DESCR(%(_g)s)->elsize;
const npy_intp K = %(_d)s->dimensions[1];
const npy_intp K = PyArray_DIMS(%(_d)s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
......@@ -3047,7 +3047,7 @@ class StructuredDotGradCSC(gof.Op):
{
// extract j-th row of dense matrix
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j * Sindptr]; i_idx < indptr[(j+1) * Sindptr]; ++i_idx)
......@@ -3062,7 +3062,7 @@ class StructuredDotGradCSC(gof.Op):
// make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0])
if (i >= PyArray_DIMS(%(_g)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows
......@@ -3142,39 +3142,39 @@ class StructuredDotGradCSR(gof.Op):
'g_ab')
return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (%(_indices)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (%(_indptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if (PyArray_NDIM(%(_d)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (PyArray_NDIM(%(_g)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (PyArray_NDIM(%(_indices)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (PyArray_NDIM(%(_indptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1])
if( PyArray_DIMS(%(_d)s)[1] != PyArray_DIMS(%(_g)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
if (!%(_zout)s
|| (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0]))
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]))
{
Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_indices)s->dimensions, %(_g)s->descr->type_num);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_g)s)->type_num);
}
{ //makes it compile even though labels jump over variable definitions.
npy_intp nnz = %(_indices)s->dimensions[0];
npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
// extract number of rows
npy_intp N = %(_indptr)s->dimensions[0]-1; //TODO: error checking with this
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/%(_indices)s->descr->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/%(_indptr)s->descr->elsize;
npy_intp Sindices = %(_indices)s->strides[0]/PyArray_DESCR(%(_indices)s)->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/PyArray_DESCR(%(_indptr)s)->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/%(_d)s->descr->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/%(_g)s->descr->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/PyArray_DESCR(%(_d)s)->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/PyArray_DESCR(%(_g)s)->elsize;
const npy_intp K = %(_d)s->dimensions[1];
const npy_intp K = PyArray_DIMS(%(_d)s)[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
......@@ -3190,7 +3190,7 @@ class StructuredDotGradCSR(gof.Op):
// extract j-th row of dense matrix
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
if(j >= PyArray_DIMS(%(_d)s)[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// extract corresponding row in gradient
const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(%(_g)s->data + %(_g)s->strides[0] * i);
......@@ -3199,7 +3199,7 @@ class StructuredDotGradCSR(gof.Op):
// make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0])
if (i >= PyArray_DIMS(%(_g)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows
......
......@@ -112,59 +112,59 @@ class StructuredDotCSC(gof.Op):
rval = """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(a_nrows)s->nd != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (PyArray_NDIM(%(a_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (PyArray_NDIM(%(a_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (PyArray_NDIM(%(a_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(a_val)s->descr->type_num != %(typenum_a_val)s) {
if (PyArray_DESCR(%(a_val)s)->type_num != %(typenum_a_val)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val"); %(fail)s;}
if (%(b)s->descr->type_num != %(typenum_b)s) {
if (PyArray_DESCR(%(b)s)->type_num != %(typenum_b)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) {
if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(a_nrows)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(a_nrows)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0])
if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
if (%(a_ptr)s->dimensions[0] != %(b)s->dimensions[0]+1)
if (PyArray_DIMS(%(a_ptr)s)[0] != PyArray_DIMS(%(b)s)[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;}
if ((!%(z)s)
|| (%(z)s->dimensions[0] != ((npy_int32 *)%(a_nrows)s->data)[0])
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1])
|| (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)%(a_nrows)s->data)[0])
|| (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1])
)
{
{Py_XDECREF(%(z)s);}
npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0];
dims[1] = %(b)s->dimensions[1];
dims[1] = PyArray_DIMS(%(b)s)[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
}
{
// sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(z)s->dimensions[0];
npy_intp N = %(z)s->dimensions[1];
npy_intp K = %(b)s->dimensions[0];
npy_intp M = PyArray_DIMS(%(z)s)[0];
npy_intp N = PyArray_DIMS(%(z)s)[1];
npy_intp K = PyArray_DIMS(%(b)s)[0];
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = %(z)s->strides[0] / %(z)s->descr->elsize;
npy_intp Szn = %(z)s->strides[1] / %(z)s->descr->elsize;
//npy_intp Sbm = %(b)s->strides[0] / %(b)s->descr->elsize;
npy_intp Sbn = %(b)s->strides[1] / %(b)s->descr->elsize;
npy_intp Sval = %(a_val)s->strides[0] / %(a_val)s->descr->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
npy_intp Szm = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize;
//npy_intp Sbm = %(b)s->strides[0] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sbn = %(b)s->strides[1] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sval = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
......@@ -172,7 +172,7 @@ class StructuredDotCSC(gof.Op):
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
//npy_intp nnz = %(a_ind)s->dimensions[0];
//npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
//clear the output array
memset(Dz, 0, M*N*sizeof(dtype_%(z)s));
......@@ -208,7 +208,7 @@ class StructuredDotCSC(gof.Op):
dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m);
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
if (m >= %(z)s->dimensions[0])
if (m >= PyArray_DIMS(%(z)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "illegal row index in a"); %(fail)s;}
// loop over final dimension (cols of dense matrix) and perform dot product
......@@ -301,46 +301,46 @@ class StructuredDotCSR(gof.Op):
raise NotImplementedError('Complex types are not supported for b')
return """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (PyArray_NDIM(%(a_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (PyArray_NDIM(%(a_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (PyArray_NDIM(%(b)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) {
if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0])
if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
if ((!%(z)s)
|| (%(z)s->dimensions[0] != %(a_ptr)s->dimensions[0]-1) //a's rows
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1]) //b's columns
|| (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(a_ptr)s)[0]-1) //a's rows
|| (PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(b)s)[1]) //b's columns
)
{
{Py_XDECREF(%(z)s);}
npy_intp dims[] = {0, 0};
dims[0] = %(a_ptr)s->dimensions[0]-1;
dims[1] = %(b)s->dimensions[1];
dims[0] = PyArray_DIMS(%(a_ptr)s)[0]-1;
dims[1] = PyArray_DIMS(%(b)s)[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
}
{
// sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(z)s->dimensions[0];
npy_intp N = %(z)s->dimensions[1];
npy_intp K = %(b)s->dimensions[0];
npy_intp M = PyArray_DIMS(%(z)s)[0];
npy_intp N = PyArray_DIMS(%(z)s)[1];
npy_intp K = PyArray_DIMS(%(b)s)[0];
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = %(z)s->strides[0] / %(z)s->descr->elsize;
npy_intp Szn = %(z)s->strides[1] / %(z)s->descr->elsize;
npy_intp Sbm = %(b)s->strides[0] / %(b)s->descr->elsize;
npy_intp Sbn = %(b)s->strides[1] / %(b)s->descr->elsize;
npy_intp Sval = %(a_val)s->strides[0] / %(a_val)s->descr->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
npy_intp Szm = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Sbm = %(b)s->strides[0] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sbn = %(b)s->strides[1] / PyArray_DESCR(%(b)s)->elsize;
npy_intp Sval = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
......@@ -348,7 +348,7 @@ class StructuredDotCSR(gof.Op):
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
//npy_intp nnz = %(a_ind)s->dimensions[0];
//npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
//clear the output array
memset(Dz, 0, M*N*sizeof(dtype_%(z)s));
......@@ -535,55 +535,56 @@ class UsmmCscDense(gof.Op):
inplace = int(self.inplace)
rval = """
if (%(x_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_val) != 1"); %(fail)s;}
if (%(x_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ind) != 1"); %(fail)s;}
if (%(x_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ptr) != 1"); %(fail)s;}
if (%(x_nrows)s->nd != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_nrows) != 0"); %(fail)s;}
if (%(y)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (%(x_val)s->descr->type_num != %(typenum_x_val)s) {
if (PyArray_NDIM(%(x_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_val) != 1"); %(fail)s;}
if (PyArray_NDIM(%(x_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ind) != 1"); %(fail)s;}
if (PyArray_NDIM(%(x_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ptr) != 1"); %(fail)s;}
if (PyArray_NDIM(%(x_nrows)s) != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (PyArray_NDIM(%(y)s) != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (PyArray_DESCR(%(x_val)s)->type_num != %(typenum_x_val)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x_val"); %(fail)s;}
if (%(y)s->descr->type_num != %(typenum_y)s) {
if (PyArray_DESCR(%(y)s)->type_num != %(typenum_y)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;}
if (%(z)s->descr->type_num != %(typenum_z)s) {
if (PyArray_DESCR(%(z)s)->type_num != %(typenum_z)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for z"); %(fail)s;}
if (%(alpha)s->descr->type_num != %(typenum_alpha)s) {
if (PyArray_DESCR(%(alpha)s)->type_num != %(typenum_alpha)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for alpha"); %(fail)s;}
if (%(x_ind)s->descr->type_num != PyArray_INT32) {
if (PyArray_DESCR(%(x_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "x_ind dtype not INT32"); %(fail)s;}
if (%(x_ptr)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(x_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_ptr dtype not INT32"); %(fail)s;}
if (%(x_nrows)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(x_nrows)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "x_nrows dtype not INT32"); %(fail)s;}
if (%(x_val)s->dimensions[0] != %(x_ind)s->dimensions[0])
if (PyArray_DIMS(%(x_val)s)[0] != PyArray_DIMS(%(x_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "x_val and x_ind have different lengths"); %(fail)s;}
if (%(x_ptr)s->dimensions[0] != %(y)s->dimensions[0]+1)
if (PyArray_DIMS(%(x_ptr)s)[0] != PyArray_DIMS(%(y)s)[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;}
if (%(z)s->dimensions[0] != ((npy_int32 *)%(x_nrows)s->data)[0] || %(z)s->dimensions[1] != %(y)s->dimensions[1])
if (PyArray_DIMS(%(z)s)[0] != ((npy_int32 *)%(x_nrows)s->data)[0] || PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(y)s)[1])
{PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size."); %(fail)s;}
if (PyArray_SIZE(%(alpha)s) != 1)
{PyErr_SetString(PyExc_NotImplementedError, "The number of element in alpha must be 1"); %(fail)s;}
if (%(alpha)s->nd != 2)
if (PyArray_NDIM(%(alpha)s) != 2)
{PyErr_SetString(PyExc_NotImplementedError, "The number dimension of alpha must be 2"); %(fail)s;}
if (%(x_val)s->nd != 1)
if (PyArray_NDIM(%(x_val)s) != 1)
{PyErr_SetString(PyExc_NotImplementedError, "The number dimension of x_val must be 1"); %(fail)s;}
if (%(y)s->nd != 2)
if (PyArray_NDIM(%(y)s) != 2)
{PyErr_SetString(PyExc_NotImplementedError, "The number dimension of y must be 2"); %(fail)s;}
if (%(z)s->nd != 2)
if (PyArray_NDIM(%(z)s) != 2)
{PyErr_SetString(PyExc_NotImplementedError, "The number dimension of z must be 2"); %(fail)s;}
if (%(inplace)s)
......@@ -596,22 +597,22 @@ class UsmmCscDense(gof.Op):
Py_INCREF(%(zn)s);
}
else if (!%(zn)s
|| (%(zn)s->dimensions[0] != ((npy_int32 *)%(x_nrows)s->data)[0])
|| (%(zn)s->dimensions[1] != %(y)s->dimensions[1])
|| (PyArray_DIMS(%(zn)s)[0] != ((npy_int32 *)%(x_nrows)s->data)[0])
|| (PyArray_DIMS(%(zn)s)[1] != PyArray_DIMS(%(y)s)[1])
)
{
{Py_XDECREF(%(zn)s);}
npy_intp dims[] = {0, 0};
dims[0] = ((npy_int32 *)%(x_nrows)s->data)[0];
dims[1] = %(y)s->dimensions[1];
dims[1] = PyArray_DIMS(%(y)s)[1];
%(zn)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_zn)s);
}
{
// sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(zn)s->dimensions[0];
npy_intp N = %(zn)s->dimensions[1];
npy_intp K = %(y)s->dimensions[0];
npy_intp M = PyArray_DIMS(%(zn)s)[0];
npy_intp N = PyArray_DIMS(%(zn)s)[1];
npy_intp K = PyArray_DIMS(%(y)s)[0];
// pointers to access actual data in the arrays passed as params.
const dtype_%(x_val)s* __restrict__ Dval = (dtype_%(x_val)s*)%(x_val)s->data;
......@@ -619,12 +620,12 @@ class UsmmCscDense(gof.Op):
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(x_ptr)s->data;
const dtype_%(alpha)s alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0];
npy_intp Sz = %(z)s->strides[1] / %(z)s->descr->elsize;
npy_intp Szn = %(zn)s->strides[1] / %(zn)s->descr->elsize;
npy_intp Sval = %(x_val)s->strides[0] / %(x_val)s->descr->elsize;
npy_intp Sind = %(x_ind)s->strides[0] / %(x_ind)s->descr->elsize;
npy_intp Sptr = %(x_ptr)s->strides[0] / %(x_ptr)s->descr->elsize;
npy_intp Sy = %(y)s->strides[1] / %(y)s->descr->elsize;
npy_intp Sz = %(z)s->strides[1] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Szn = %(zn)s->strides[1] / PyArray_DESCR(%(zn)s)->elsize;
npy_intp Sval = %(x_val)s->strides[0] / PyArray_DESCR(%(x_val)s)->elsize;
npy_intp Sind = %(x_ind)s->strides[0] / PyArray_DESCR(%(x_ind)s)->elsize;
npy_intp Sptr = %(x_ptr)s->strides[0] / PyArray_DESCR(%(x_ptr)s)->elsize;
npy_intp Sy = %(y)s->strides[1] / PyArray_DESCR(%(y)s)->elsize;
if (!(%(inplace)s))
......@@ -742,58 +743,58 @@ class CSMGradC(gof.Op):
raise NotImplementedError('Complex types are not supported for b_val')
return """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(b_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_val) != 1"); %(fail)s;}
if (%(b_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;}
if (%(b_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) {
if (PyArray_NDIM(%(a_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (PyArray_NDIM(%(a_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (PyArray_NDIM(%(a_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (PyArray_NDIM(%(b_val)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_val) != 1"); %(fail)s;}
if (PyArray_NDIM(%(b_ind)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;}
if (PyArray_NDIM(%(b_ptr)s) != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;}
if (PyArray_DESCR(%(a_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(a_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(b_ind)s->descr->type_num != PyArray_INT32) {
if (PyArray_DESCR(%(b_ind)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32"); %(fail)s;}
if (%(b_ptr)s->descr->type_num != PyArray_INT32)
if (PyArray_DESCR(%(b_ptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0])
if (PyArray_DIMS(%(a_val)s)[0] != PyArray_DIMS(%(a_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
if (%(b_val)s->dimensions[0] != %(b_ind)s->dimensions[0])
if (PyArray_DIMS(%(b_val)s)[0] != PyArray_DIMS(%(b_ind)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "b_val and b_ind have different lengths"); %(fail)s;}
if (%(a_ptr)s->dimensions[0] != %(b_ptr)s->dimensions[0])
if (PyArray_DIMS(%(a_ptr)s)[0] != PyArray_DIMS(%(b_ptr)s)[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr and b_ptr have different lengths"); %(fail)s;}
if ((!%(z)s) || (%(z)s->dimensions[0] != %(a_val)s->dimensions[0]))
if ((!%(z)s) || (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(a_val)s)[0]))
{
{Py_XDECREF(%(z)s);}
npy_intp dims[] = {0};
dims[0] = %(a_val)s->dimensions[0];
dims[0] = PyArray_DIMS(%(a_val)s)[0];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_z)s);
}
{
// sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(a_ptr)s->dimensions[0] - 1;
npy_intp M = PyArray_DIMS(%(a_ptr)s)[0] - 1;
npy_intp a_dim_0 = ((npy_int32 *)%(a_dim)s->data)[0];
npy_intp a_dim_1 = ((npy_int32 *)%(a_dim)s->data)[1];
npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Sz = %(z)s->strides[0] / %(z)s->descr->elsize;
npy_intp Sa_val = %(a_val)s->strides[0] / %(a_val)s->descr->elsize;
npy_intp Sa_ind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize;
npy_intp Sa_ptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
npy_intp Sb_val = %(b_val)s->strides[0] / %(b_val)s->descr->elsize;
npy_intp Sb_ind = %(b_ind)s->strides[0] / %(b_ind)s->descr->elsize;
npy_intp Sb_ptr = %(b_ptr)s->strides[0] / %(b_ptr)s->descr->elsize;
npy_intp Sz = %(z)s->strides[0] / PyArray_DESCR(%(z)s)->elsize;
npy_intp Sa_val = %(a_val)s->strides[0] / PyArray_DESCR(%(a_val)s)->elsize;
npy_intp Sa_ind = %(a_ind)s->strides[0] / PyArray_DESCR(%(a_ind)s)->elsize;
npy_intp Sa_ptr = %(a_ptr)s->strides[0] / PyArray_DESCR(%(a_ptr)s)->elsize;
npy_intp Sb_val = %(b_val)s->strides[0] / PyArray_DESCR(%(b_val)s)->elsize;
npy_intp Sb_ind = %(b_ind)s->strides[0] / PyArray_DESCR(%(b_ind)s)->elsize;
npy_intp Sb_ptr = %(b_ptr)s->strides[0] / PyArray_DESCR(%(b_ptr)s)->elsize;
// pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
......@@ -804,7 +805,7 @@ class CSMGradC(gof.Op):
const npy_int32 * __restrict__ Db_ind = (npy_int32*)%(b_ind)s->data;
const npy_int32 * __restrict__ Db_ptr = (npy_int32*)%(b_ptr)s->data;
npy_intp nnz = %(a_ind)s->dimensions[0];
npy_intp nnz = PyArray_DIMS(%(a_ind)s)[0];
dtype_%(b_val)s b_row[sp_dim];
......@@ -897,32 +898,32 @@ class MulSDCSC(gof.Op):
raise NotImplementedError('Complex types are not supported for b')
return """
if (%(_b)s->nd != 2) {
if (PyArray_NDIM(%(_b)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)s;}
if (%(_data)s->nd != 1) {
if (PyArray_NDIM(%(_data)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1");
%(fail)s;}
if (%(_indices)s->nd != 1) {
if (PyArray_NDIM(%(_indices)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)s;}
if (%(_indptr)s->nd != 1) {
if (PyArray_NDIM(%(_indptr)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s)
{
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_indices)s->dimensions, %(_b)s->descr->type_num);
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
}
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])
if (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
{
PyErr_SetString(PyExc_NotImplementedError,
"somehow _zout got the wrong size.. and I don't know how to resize it.");
......@@ -930,9 +931,9 @@ class MulSDCSC(gof.Op):
}
{ //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = %(_indices)s->dimensions[0];
const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
//TODO: error checking with this
const npy_intp N = %(_indptr)s->dimensions[0]-1;
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
......@@ -1011,32 +1012,32 @@ class MulSDCSR(gof.Op):
raise NotImplementedError('Complex types are not supported for b')
return """
if (%(_b)s->nd != 2) {
if (PyArray_NDIM(%(_b)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2");
%(fail)s;}
if (%(_data)s->nd != 1) {
if (PyArray_NDIM(%(_data)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1");
%(fail)s;}
if (%(_indices)s->nd != 1) {
if (PyArray_NDIM(%(_indices)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)s;}
if (%(_indptr)s->nd != 1) {
if (PyArray_NDIM(%(_indptr)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s)
{
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_indices)s->dimensions, %(_b)s->descr->type_num);
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
}
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])
if (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
{
PyErr_SetString(PyExc_NotImplementedError,
"somehow _zout got the wrong size.. and I don't know how to resize it.");
......@@ -1044,9 +1045,9 @@ class MulSDCSR(gof.Op):
}
{ //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = %(_indices)s->dimensions[0];
const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
//TODO: error checking with this
const npy_intp N = %(_indptr)s->dimensions[0]-1;
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
......@@ -1161,42 +1162,42 @@ class MulSVCSR(gof.Op):
raise NotImplementedError('Complex types are not supported for b')
return """
if (%(_b)s->nd != 1) {
if (PyArray_NDIM(%(_b)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1");
%(fail)s;
}
if (%(_data)s->nd != 1) {
if (PyArray_NDIM(%(_data)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1");
%(fail)s;
}
if (%(_indices)s->nd != 1) {
if (PyArray_NDIM(%(_indices)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)s;
}
if (%(_indptr)s->nd != 1) {
if (PyArray_NDIM(%(_indptr)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;
}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s
|| %(_zout)s->dimensions[0] != %(_indices)s->dimensions[0]
|| PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0]
|| !PyArray_ISCONTIGUOUS(%(_zout)s))
{
Py_XDECREF(%(_zout)s);
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_indices)s->dimensions, %(_b)s->descr->type_num);
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
}
{ //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = %(_indices)s->dimensions[0];
const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
//TODO: error checking with this
const npy_intp N = %(_indptr)s->dimensions[0]-1;
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
......@@ -1206,7 +1207,7 @@ class MulSVCSR(gof.Op):
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data;
const npy_intp Sb = %(_b)s->strides[0] / %(_b)s->descr->elsize;
const npy_intp Sb = %(_b)s->strides[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over rows
for (npy_int32 j = 0; j < N; ++j)
......@@ -1311,36 +1312,36 @@ class StructuredAddSVCSR(gof.Op):
raise NotImplementedError('Complex types are not supported for b')
return """
if (%(_b)s->nd != 1) {
if (PyArray_NDIM(%(_b)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1");
%(fail)s;
}
if (%(_data)s->nd != 1) {
if (PyArray_NDIM(%(_data)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1");
%(fail)s;
}
if (%(_indices)s->nd != 1) {
if (PyArray_NDIM(%(_indices)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1");
%(fail)s;
}
if (%(_indptr)s->nd != 1) {
if (PyArray_NDIM(%(_indptr)s) != 1) {
PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1");
%(fail)s;
}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
if( PyArray_DESCR(%(_indices)s)->type_num != NPY_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
if( PyArray_DESCR(%(_indptr)s)->type_num != NPY_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if (!%(_zout)s)
{
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1,
%(_indices)s->dimensions, %(_b)s->descr->type_num);
PyArray_DIMS(%(_indices)s), PyArray_DESCR(%(_b)s)->type_num);
}
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])
if (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_indices)s)[0])
{
PyErr_SetString(PyExc_NotImplementedError,
"somehow _zout got the wrong size.. and I don't know how to resize it.");
......@@ -1348,9 +1349,9 @@ class StructuredAddSVCSR(gof.Op):
}
{ //makes it compile even though labels jump over variable definitions.
const npy_intp nnz = %(_indices)s->dimensions[0];
const npy_intp nnz = PyArray_DIMS(%(_indices)s)[0];
//TODO: error checking with this
const npy_intp N = %(_indptr)s->dimensions[0]-1;
const npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
......@@ -1360,7 +1361,7 @@ class StructuredAddSVCSR(gof.Op):
dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data;
const npy_intp Sb = %(_b)s->strides[0] / %(_b)s->descr->elsize;
const npy_intp Sb = %(_b)s->strides[0] / PyArray_DESCR(%(_b)s)->elsize;
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
......@@ -1537,70 +1538,70 @@ class SamplingDotCSR(gof.Op):
[]).dtype_specs()[-1]
rval = """
if (%(x)s->nd != 2) {
if (PyArray_NDIM(%(x)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
if (%(y)s->nd != 2) {
if (PyArray_NDIM(%(y)s) != 2) {
PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
if (%(x)s->descr->type_num != %(typenum_x)s) {
if (PyArray_DESCR(%(x)s)->type_num != %(typenum_x)s) {
PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for x");
%(fail)s;}
if (%(y)s->descr->type_num != %(typenum_y)s) {
if (PyArray_DESCR(%(y)s)->type_num != %(typenum_y)s) {
PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for y");
%(fail)s;}
if (%(p_data)s->descr->type_num != %(typenum_p)s) {
if (PyArray_DESCR(%(p_data)s)->type_num != %(typenum_p)s) {
PyErr_SetString(PyExc_NotImplementedError,
"Invalid type for pattern");
%(fail)s;}
if (%(x)s->dimensions[1] != %(y)s->dimensions[1]) {
if (PyArray_DIMS(%(x)s)[1] != PyArray_DIMS(%(y)s)[1]) {
PyErr_SetString(PyExc_NotImplementedError,
"x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed.");
%(fail)s;}
if (%(y)s->dimensions[0] != ((npy_int32 *)%(p_ncols)s->data)[0] ||
%(x)s->dimensions[0] != (%(p_ptr)s->dimensions[0] - 1))
if (PyArray_DIMS(%(y)s)[0] != ((npy_int32 *)%(p_ncols)s->data)[0] ||
PyArray_DIMS(%(x)s)[0] != (PyArray_DIMS(%(p_ptr)s)[0] - 1))
{PyErr_SetString(PyExc_NotImplementedError,
"The dimension of the pattern and the output must match"); %(fail)s;}
// Allocate output
if (!%(z_data)s
|| (%(z_data)s->dimensions[0] != %(p_data)s->dimensions[0])
|| (%(z_data)s->descr->type_num != %(typenum_zd)s)) {
|| (PyArray_DIMS(%(z_data)s)[0] != PyArray_DIMS(%(p_data)s)[0])
|| (PyArray_DESCR(%(z_data)s)->type_num != %(typenum_zd)s)) {
{Py_XDECREF(%(z_data)s);}
npy_intp dims[] = {0};
dims[0] = %(p_data)s->dimensions[0];
dims[0] = PyArray_DIMS(%(p_data)s)[0];
%(z_data)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
%(typenum_zd)s);
}
if (!%(z_ind)s
|| (%(z_ind)s->dimensions[0] != %(p_ind)s->dimensions[0])
|| (%(z_ind)s->descr->type_num != %(typenum_zi)s)) {
|| (PyArray_DIMS(%(z_ind)s)[0] != PyArray_DIMS(%(p_ind)s)[0])
|| (PyArray_DESCR(%(z_ind)s)->type_num != %(typenum_zi)s)) {
{Py_XDECREF(%(z_ind)s);}
npy_intp dims[] = {0};
dims[0] = %(p_ind)s->dimensions[0];
dims[0] = PyArray_DIMS(%(p_ind)s)[0];
%(z_ind)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
%(typenum_zi)s);
}
if (!%(z_ptr)s
|| (%(z_ptr)s->dimensions[0] != %(p_ptr)s->dimensions[0])
|| (%(z_ptr)s->descr->type_num != %(typenum_zp)s)) {
|| (PyArray_DIMS(%(z_ptr)s)[0] != PyArray_DIMS(%(p_ptr)s)[0])
|| (PyArray_DESCR(%(z_ptr)s)->type_num != %(typenum_zp)s)) {
{Py_XDECREF(%(z_ptr)s);}
npy_intp dims[] = {0};
dims[0] = %(p_ptr)s->dimensions[0];
dims[0] = PyArray_DIMS(%(p_ptr)s)[0];
%(z_ptr)s = (PyArrayObject*) PyArray_SimpleNew(1, dims,
%(typenum_zp)s);
}
{
// Product of MxK and NxK, output MxN
npy_intp M = %(x)s->dimensions[0];
npy_intp N = %(y)s->dimensions[0];
npy_intp K = %(y)s->dimensions[1];
npy_intp M = PyArray_DIMS(%(x)s)[0];
npy_intp N = PyArray_DIMS(%(y)s)[0];
npy_intp K = PyArray_DIMS(%(y)s)[1];
// pointers to access actual data in the arrays passed as params.
const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data;
......@@ -1612,17 +1613,17 @@ PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)%(z_ind)s->data;
dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)%(z_ptr)s->data;
const npy_intp Sdx = %(x)s->strides[1]/%(x)s->descr->elsize;
const npy_intp Sdy = %(y)s->strides[1]/%(y)s->descr->elsize;
const npy_intp Sdpd = %(p_data)s->strides[0] / %(p_data)s->descr->elsize;
const npy_intp Sdpi = %(p_ind)s->strides[0] / %(p_ind)s->descr->elsize;
const npy_intp Sdpp = %(p_ptr)s->strides[0] / %(p_ptr)s->descr->elsize;
const npy_intp Sdzd = %(z_data)s->strides[0] / %(z_data)s->descr->elsize;
const npy_intp Sdzi = %(z_ind)s->strides[0] / %(z_ind)s->descr->elsize;
const npy_intp Sdzp = %(z_ptr)s->strides[0] / %(z_ptr)s->descr->elsize;
memcpy(Dzi, Dpi, %(p_ind)s->dimensions[0]*sizeof(dtype_%(p_ind)s));
memcpy(Dzp, Dpp, %(p_ptr)s->dimensions[0]*sizeof(dtype_%(p_ptr)s));
const npy_intp Sdx = %(x)s->strides[1]/PyArray_DESCR(%(x)s)->elsize;
const npy_intp Sdy = %(y)s->strides[1]/PyArray_DESCR(%(y)s)->elsize;
const npy_intp Sdpd = %(p_data)s->strides[0] / PyArray_DESCR(%(p_data)s)->elsize;
const npy_intp Sdpi = %(p_ind)s->strides[0] / PyArray_DESCR(%(p_ind)s)->elsize;
const npy_intp Sdpp = %(p_ptr)s->strides[0] / PyArray_DESCR(%(p_ptr)s)->elsize;
const npy_intp Sdzd = %(z_data)s->strides[0] / PyArray_DESCR(%(z_data)s)->elsize;
const npy_intp Sdzi = %(z_ind)s->strides[0] / PyArray_DESCR(%(z_ind)s)->elsize;
const npy_intp Sdzp = %(z_ptr)s->strides[0] / PyArray_DESCR(%(z_ptr)s)->elsize;
memcpy(Dzi, Dpi, PyArray_DIMS(%(p_ind)s)[0]*sizeof(dtype_%(p_ind)s));
memcpy(Dzp, Dpp, PyArray_DIMS(%(p_ptr)s)[0]*sizeof(dtype_%(p_ptr)s));
for (npy_int32 m = 0; m < M; ++m) {
for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
......
......@@ -1374,7 +1374,7 @@ def test_sparse_shared_memory():
theano.In(y, mutable=True)], z, mode='FAST_RUN')
def f_(x, y, m1=m1, m2=m2):
return numpy.dot(x * 3, m1) + numpy.dot(y * 2, m2)
return ((x * 3) * m1) + ((y * 2) * m2)
assert SparseType.may_share_memory(a, a) # This is trivial
result = f(a, a)
......
......@@ -1972,7 +1972,7 @@ class ScalarFromTensor(Op):
z, = outputs
fail = sub['fail']
return """
%(z)s = ((dtype_%(x)s*)(%(x)s->data))[0];
%(z)s = ((dtype_%(x)s*)(PyArray_DATA(%(x)s)))[0];
""" % locals()
def c_code_cache_version(self):
......@@ -3087,14 +3087,14 @@ class Alloc(gof.Op):
# Initialize shape
for i, shp_i in enumerate(inp[1:]):
code += """
shape[%(i)s] = ((dtype_%(shp_i)s*) %(shp_i)s->data)[0];
shape[%(i)s] = ((dtype_%(shp_i)s*) PyArray_DATA(%(shp_i)s))[0];
""" % dict(i=i, shp_i=shp_i)
code += """
int need_new_out = (NULL == %(zz)s);
for (int i = 0; i < %(ndim)s; i++)
need_new_out = (need_new_out
|| (%(zz)s->dimensions[i] != shape[i]));
|| (PyArray_DIMS(%(zz)s)[i] != shape[i]));
if (need_new_out)
{
......@@ -4042,14 +4042,14 @@ class Subtensor(Op):
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
Py_INCREF(%(x)s->descr);
Py_INCREF(PyArray_DESCR(%(x)s));
PyArrayObject * xview = (PyArrayObject*)PyArray_NewFromDescr(
&PyArray_Type,
%(x)s->descr,
PyArray_DESCR(%(x)s),
%(view_ndim)s,
%(x)s->dimensions,
%(x)s->strides,
%(x)s->data,
PyArray_DIMS(%(x)s),
PyArray_STRIDES(%(x)s),
PyArray_DATA(%(x)s),
%(x)s->flags,
NULL);
if (!xview)
......@@ -4057,22 +4057,22 @@ class Subtensor(Op):
%(fail)s;
}
if ((xview->dimensions == %(x)s->dimensions)
&& (%(x)s->dimensions != NULL))
if ((PyArray_DIMS(xview) == PyArray_DIMS(%(x)s))
&& (PyArray_DIMS(%(x)s) != NULL))
{
PyErr_Format(PyExc_ValueError, "x and xview"
"(with %%d dims) have the same dimensions"
" pointers: %%p and %%p",
%(x)s->nd, xview->dimensions, %(x)s->dimensions);
PyArray_NDIM(%(x)s), PyArray_DIMS(xview), PyArray_DIMS(%(x)s));
%(fail)s;
}
if (xview->strides == %(x)s->strides
&& (%(x)s->dimensions != NULL))
if (PyArray_STRIDES(xview) == PyArray_STRIDES(%(x)s)
&& (PyArray_DIMS(%(x)s) != NULL))
{
PyErr_Format(PyExc_ValueError, "x and xview"
"(with %%d dims) have the same strides"
" pointers: %%p and %%p",
%(x)s->nd, xview->strides, %(x)s->strides);
PyArray_NDIM(%(x)s), PyArray_STRIDES(xview), PyArray_STRIDES(%(x)s));
%(fail)s;
}
......@@ -4080,7 +4080,7 @@ class Subtensor(Op):
{
if (is_slice[outer_ii])
{
npy_intp length = %(x)s->dimensions[outer_ii];
npy_intp length = PyArray_DIMS(%(x)s)[outer_ii];
npy_intp slicelength;
npy_intp start = subtensor_spec[spec_pos+0];
npy_intp stop = subtensor_spec[spec_pos+1];
......@@ -4144,9 +4144,9 @@ class Subtensor(Op):
}
assert (slicelength <= length);
xview->data += %(x)s->strides[outer_ii] * start;
xview->dimensions[inner_ii] = slicelength;
xview->strides[inner_ii] = %(x)s->strides[outer_ii] * step;
xview->data += PyArray_STRIDES(%(x)s)[outer_ii] * start;
PyArray_DIMS(xview)[inner_ii] = slicelength;
PyArray_STRIDES(xview)[inner_ii] = PyArray_STRIDES(%(x)s)[outer_ii] * step;
inner_ii += 1;
spec_pos += 3;
......@@ -4154,12 +4154,12 @@ class Subtensor(Op):
else // tuple coord `outer_ii` is an int
{
int idx = subtensor_spec[spec_pos];
if (idx < 0) idx += %(x)s->dimensions[outer_ii];
if (idx < 0) idx += PyArray_DIMS(%(x)s)[outer_ii];
if (idx >= 0)
{
if (idx < %(x)s->dimensions[outer_ii])
if (idx < PyArray_DIMS(%(x)s)[outer_ii])
{
xview->data += %(x)s->strides[outer_ii] * idx;
xview->data += PyArray_STRIDES(%(x)s)[outer_ii] * idx;
}
else
{
......@@ -4176,16 +4176,16 @@ class Subtensor(Op):
spec_pos += 1;
}
}
assert (inner_ii <= xview->nd);
while (inner_ii < xview->nd)
assert (inner_ii <= PyArray_NDIM(xview));
while (inner_ii < PyArray_NDIM(xview))
{
assert (outer_ii < %(x)s->nd);
xview->dimensions[inner_ii] = %(x)s->dimensions[outer_ii];
xview->strides[inner_ii] = %(x)s->strides[outer_ii];
assert (outer_ii < PyArray_NDIM(%(x)s));
PyArray_DIMS(xview)[inner_ii] = PyArray_DIMS(%(x)s)[outer_ii];
PyArray_STRIDES(xview)[inner_ii] = PyArray_STRIDES(%(x)s)[outer_ii];
inner_ii += 1;
outer_ii += 1;
}
PyArray_UpdateFlags(xview, NPY_C_CONTIGUOUS|NPY_F_CONTIGUOUS);
PyArray_UpdateFlags(xview, NPY_ARRAY_C_CONTIGUOUS|NPY_F_CONTIGUOUS);
""" % locals()
# print rval
return rval
......@@ -4203,7 +4203,7 @@ class Subtensor(Op):
part1 = """
if (%(z)s) Py_DECREF(%(z)s);
Py_INCREF(py_%(x)s);
xview->base = py_%(x)s;
PyArray_BASE(xview) = py_%(x)s;
assert(py_%(x)s == (PyObject*)%(x)s);
%(z)s = xview;
""" % locals()
......@@ -4504,7 +4504,7 @@ class IncSubtensor(Op):
{
if (%(z)s) Py_DECREF(%(z)s);
%(z)s = (PyArrayObject*)PyArray_FromAny(py_%(x)s, NULL, 0, 0,
NPY_ENSURECOPY, NULL);
NPY_ARRAY_ENSURECOPY, NULL);
}
""" % locals()
......@@ -4529,7 +4529,7 @@ class IncSubtensor(Op):
if (add_rval)
{
assert (PyArray_Check((PyObject*)add_rval));
assert (add_rval->data == xview->data);
assert (PyArray_DATA(add_rval) == PyArray_DATA(xview));
Py_DECREF(add_rval);
}
else
......@@ -5373,7 +5373,7 @@ class Reshape(Op):
new_ndim = self.ndim
fail = sub['fail']
return """
assert (%(shp)s->nd == 1);
assert (PyArray_NDIM(%(shp)s) == 1);
npy_intp new_dims[%(new_ndim)s];
PyArray_Dims newshape;
newshape.ptr = new_dims;
......@@ -5385,7 +5385,7 @@ class Reshape(Op):
// -- will err if this will downcast. This could happen if the
// -- user pass an int64 dtype, but npy_intp endup being int32.
new_dims[ii] = ((dtype_%(shp)s*)(
%(shp)s->data + ii * %(shp)s->strides[0]))[0];
PyArray_DATA(%(shp)s) + ii * PyArray_STRIDES(%(shp)s)[0]))[0];
}
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject *) PyArray_Newshape(%(x)s, &newshape,
......
......@@ -493,16 +493,16 @@ class GemmRelated(Op):
declare_NS = """
int unit = 0;
int type_num = %(_x)s->descr->type_num;
int type_size = %(_x)s->descr->elsize; // in bytes
int type_num = PyArray_DESCR(%(_x)s)->type_num;
int type_size = PyArray_DESCR(%(_x)s)->elsize; // in bytes
npy_intp* Nx = %(_x)s->dimensions;
npy_intp* Ny = %(_y)s->dimensions;
npy_intp* Nz = 0; //%(_zout)s->dimensions;
npy_intp* Nx = PyArray_DIMS(%(_x)s);
npy_intp* Ny = PyArray_DIMS(%(_y)s);
npy_intp* Nz = 0; //PyArray_DIMS(%(_zout)s);
npy_intp* Sx = %(_x)s->strides;
npy_intp* Sy = %(_y)s->strides;
npy_intp* Sz = 0; //%(_zout)s->strides;
npy_intp* Sx = PyArray_STRIDES(%(_x)s);
npy_intp* Sy = PyArray_STRIDES(%(_y)s);
npy_intp* Sz = 0; //PyArray_STRIDES(%(_zout)s);
//strides for x, y, z in dimensions 0, 1
int sx_0, sx_1, sy_0, sy_1, sz_0, sz_1;
......@@ -511,39 +511,49 @@ class GemmRelated(Op):
#setup_z_Nz_Sz = None
check_xyz_rank2 = """
if (%(_x)s->nd != 2) {
PyErr_Format(PyExc_NotImplementedError, "rank(x) != 2. rank(x) is %%d.", %(_x)s->nd); %(fail)s;}
if (%(_y)s->nd != 2) {
PyErr_Format(PyExc_NotImplementedError, "rank(y) != 2. rank(y) is %%d.", %(_y)s->nd); %(fail)s;}
if (%(_zout)s && %(_zout)s->nd != 2) {
PyErr_Format(PyExc_NotImplementedError, "rank(z) != 2. rank(z) is %%d.", %(_zout)s->nd); %(fail)s;}
if (PyArray_NDIM(%(_x)s) != 2) {
PyErr_Format(PyExc_NotImplementedError,
"rank(x) != 2. rank(x) is %%d.",
PyArray_NDIM(%(_x)s));
%(fail)s;
}
if (PyArray_NDIM(%(_y)s) != 2) {
PyErr_Format(PyExc_NotImplementedError,
"rank(y) != 2. rank(y) is %%d.", PyArray_NDIM(%(_y)s));
%(fail)s;
}
if (%(_zout)s && PyArray_NDIM(%(_zout)s) != 2) {
PyErr_Format(PyExc_NotImplementedError,
"rank(z) != 2. rank(z) is %%d.", PyArray_NDIM(%(_zout)s));
%(fail)s;
}
"""
check_xyz_double_or_float = """
if ((%(_x)s->descr->type_num != PyArray_DOUBLE)
&& (%(_x)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(_x)s)->type_num != NPY_DOUBLE)
&& (PyArray_DESCR(%(_x)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(x) is not double or float"); %(fail)s;}
if ((%(_y)s->descr->type_num != PyArray_DOUBLE)
&& (%(_y)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(_y)s)->type_num != NPY_DOUBLE)
&& (PyArray_DESCR(%(_y)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(y) is not double or float"); %(fail)s;}
if ((%(_zout)s->descr->type_num != PyArray_DOUBLE)
&& (%(_zout)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(_zout)s)->type_num != NPY_DOUBLE)
&& (PyArray_DESCR(%(_zout)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(z) is not double or float"); %(fail)s;}
if ((%(_x)s->descr->type_num != %(_y)s->descr->type_num)
||(%(_x)s->descr->type_num != %(_zout)s->descr->type_num))
if ((PyArray_DESCR(%(_x)s)->type_num != PyArray_DESCR(%(_y)s)->type_num)
||(PyArray_DESCR(%(_x)s)->type_num != PyArray_DESCR(%(_zout)s)->type_num))
{ PyErr_SetString(PyExc_NotImplementedError, "type(x), type(y), type(z) are not all the same"); %(fail)s; }
"""
#it is not necessary that a or b have the same type as x,y,z
check_ab_double_or_float = """
if ((%(_a)s->descr->type_num != PyArray_DOUBLE)
&& (%(_a)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(_a)s)->type_num != NPY_DOUBLE)
&& (PyArray_DESCR(%(_a)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(a) is not double or float"); %(fail)s;}
if ((%(_b)s->descr->type_num != PyArray_DOUBLE)
&& (%(_b)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(_b)s)->type_num != NPY_DOUBLE)
&& (PyArray_DESCR(%(_b)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError, "type(b) is not double or float"); %(fail)s;}
"""
......@@ -587,7 +597,7 @@ class GemmRelated(Op):
%(fail)s
Py_XDECREF(%(_x)s);
%(_x)s = _x_copy;
Sx = %(_x)s->strides;
Sx = PyArray_STRIDES(%(_x)s);
}
if ((Sy[0] < 1) || (Sy[1] < 1) || (Sy[0] MOD type_size) || (Sy[1] MOD type_size)
......@@ -598,7 +608,7 @@ class GemmRelated(Op):
%(fail)s
Py_XDECREF(%(_y)s);
%(_y)s = _y_copy;
Sy = %(_y)s->strides;
Sy = PyArray_STRIDES(%(_y)s);
}
if ((Sz[0] < 1) || (Sz[1] < 1) || (Sz[0] MOD type_size) || (Sz[1] MOD type_size)
......@@ -609,7 +619,7 @@ class GemmRelated(Op):
%(fail)s
Py_XDECREF(%(_zout)s);
%(_zout)s = _z_copy;
Sz = %(_zout)s->strides;
Sz = PyArray_STRIDES(%(_zout)s);
}
"""
......@@ -644,7 +654,7 @@ class GemmRelated(Op):
"""
case_float = """
case PyArray_FLOAT:
case NPY_FLOAT:
{
"""
......@@ -677,7 +687,7 @@ class GemmRelated(Op):
case_double = """
}
break;
case PyArray_DOUBLE:
case NPY_DOUBLE:
{
"""
......@@ -878,25 +888,25 @@ class Gemm(GemmRelated):
%(_zout)s = %(_z)s;
Py_INCREF(%(_zout)s);
}
Nz = %(_z)s->dimensions;
Sz = %(_z)s->strides;
Nz = PyArray_DIMS(%(_z)s);
Sz = PyArray_STRIDES(%(_z)s);
"""
setup_z_Nz_Sz_outplace = """
if ((NULL == %(_zout)s)
|| (%(_zout)s->dimensions[0] != %(_z)s->dimensions[0])
|| (%(_zout)s->dimensions[1] != %(_z)s->dimensions[1])
|| (%(_zout)s->strides[0] <= 0)
|| (%(_zout)s->strides[1] <= 0)
|| (%(_zout)s->strides[0] MOD type_size)
|| (%(_zout)s->strides[1] MOD type_size)
|| ((%(_zout)s->strides[0] != type_size)
&& (%(_zout)s->strides[1] != type_size)))
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_z)s)[0])
|| (PyArray_DIMS(%(_zout)s)[1] != PyArray_DIMS(%(_z)s)[1])
|| (PyArray_STRIDES(%(_zout)s)[0] <= 0)
|| (PyArray_STRIDES(%(_zout)s)[1] <= 0)
|| (PyArray_STRIDES(%(_zout)s)[0] MOD type_size)
|| (PyArray_STRIDES(%(_zout)s)[1] MOD type_size)
|| ((PyArray_STRIDES(%(_zout)s)[0] != type_size)
&& (PyArray_STRIDES(%(_zout)s)[1] != type_size)))
{
Py_XDECREF(%(_zout)s);
npy_intp dims[2];
dims[0] = %(_z)s->dimensions[0];
dims[1] = %(_z)s->dimensions[1];
dims[0] = PyArray_DIMS(%(_z)s)[0];
dims[1] = PyArray_DIMS(%(_z)s)[1];
%(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims,
type_num_%(_z)s);
//fprintf(stderr, "Gemm Allocating %%i %%i\\n", dims[0], dims[1]);
......@@ -906,17 +916,17 @@ class Gemm(GemmRelated):
%(fail)s
}
}
Nz = %(_zout)s->dimensions;
Sz = %(_zout)s->strides;
Nz = PyArray_DIMS(%(_zout)s);
Sz = PyArray_STRIDES(%(_zout)s);
if (%(_zout)s->descr->type_num == PyArray_FLOAT)
if (PyArray_DESCR(%(_zout)s)->type_num == NPY_FLOAT)
{
float * zoutdata = (float*)%(_zout)s->data;
float * zoutdata = (float*)PyArray_DATA(%(_zout)s);
int zoi = Sz[0] / sizeof(float);
int zoj = Sz[1] / sizeof(float);
const float * zdata = (float*)%(_z)s->data;
int zi = %(_z)s->strides[0]/sizeof(float);
int zj = %(_z)s->strides[1]/sizeof(float);
const float * zdata = (float*)PyArray_DATA(%(_z)s);
int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(float);
int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(float);
for (int i = 0; i < Nz[0]; ++i)
{
for (int j = 0; j < Nz[1]; ++j)
......@@ -925,14 +935,14 @@ class Gemm(GemmRelated):
}
}
}
else if (%(_zout)s->descr->type_num == PyArray_DOUBLE)
else if (PyArray_DESCR(%(_zout)s)->type_num == NPY_DOUBLE)
{
double * zoutdata = (double*) %(_zout)s->data;
double * zoutdata = (double*) PyArray_DATA(%(_zout)s);
int zoi = Sz[0] / sizeof(double);
int zoj = Sz[1] / sizeof(double);
const double * zdata = (double*)%(_z)s->data;
int zi = %(_z)s->strides[0]/sizeof(double);
int zj = %(_z)s->strides[1]/sizeof(double);
const double * zdata = (double*)PyArray_DATA(%(_z)s);
int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(double);
int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(double);
for (int i = 0; i < Nz[0]; ++i)
{
for (int j = 0; j < Nz[1]; ++j)
......@@ -951,22 +961,22 @@ class Gemm(GemmRelated):
case_float_ab_constants = """
#define REAL float
float a = (%(_a)s->descr->type_num == PyArray_FLOAT)
? (REAL)(((float*)%(_a)s->data)[0])
: (REAL)(((double*)%(_a)s->data)[0]);
float b = (%(_b)s->descr->type_num == PyArray_FLOAT) ?
(REAL)(((float*)%(_b)s->data)[0])
: (REAL)(((double*)%(_b)s->data)[0]);
float a = (PyArray_DESCR(%(_a)s)->type_num == NPY_FLOAT)
? (REAL)(((float*)PyArray_DATA(%(_a)s))[0])
: (REAL)(((double*)PyArray_DATA(%(_a)s))[0]);
float b = (PyArray_DESCR(%(_b)s)->type_num == NPY_FLOAT) ?
(REAL)(((float*)PyArray_DATA(%(_b)s))[0])
: (REAL)(((double*)PyArray_DATA(%(_b)s))[0]);
#undef REAL
"""
case_double_ab_constants = """
#define REAL double
double a = (%(_a)s->descr->type_num == PyArray_FLOAT)
? (REAL)(((float*)%(_a)s->data)[0])
: (REAL)(((double*)%(_a)s->data)[0]);
double b = (%(_b)s->descr->type_num == PyArray_FLOAT) ?
(REAL)(((float*)%(_b)s->data)[0])
: (REAL)(((double*)%(_b)s->data)[0]);
double a = (PyArray_DESCR(%(_a)s)->type_num == NPY_FLOAT)
? (REAL)(((float*)PyArray_DATA(%(_a)s))[0])
: (REAL)(((double*)PyArray_DATA(%(_a)s))[0]);
double b = (PyArray_DESCR(%(_b)s)->type_num == NPY_FLOAT) ?
(REAL)(((float*)PyArray_DATA(%(_b)s))[0])
: (REAL)(((double*)PyArray_DATA(%(_b)s))[0]);
#undef REAL
"""
......@@ -1469,13 +1479,13 @@ class Dot22(GemmRelated):
setup_z_Nz_Sz = """
if ((NULL == %(_zout)s)
|| (%(_zout)s->dimensions[0] != %(_x)s->dimensions[0])
|| (%(_zout)s->dimensions[1] != %(_y)s->dimensions[1]))
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_x)s)[0])
|| (PyArray_DIMS(%(_zout)s)[1] != PyArray_DIMS(%(_y)s)[1]))
{
if (NULL != %(_zout)s) Py_XDECREF(%(_zout)s);
npy_intp dims[2];
dims[0] = %(_x)s->dimensions[0];
dims[1] = %(_y)s->dimensions[1];
dims[0] = PyArray_DIMS(%(_x)s)[0];
dims[1] = PyArray_DIMS(%(_y)s)[1];
%(_zout)s = (PyArrayObject*)PyArray_SimpleNew(2, dims,
type_num_%(_x)s);
//fprintf(stderr, "Dot Allocating %%i %%i\\n", dims[0], dims[1]);
......@@ -1485,8 +1495,8 @@ class Dot22(GemmRelated):
%(fail)s
}
}
Nz = %(_zout)s->dimensions;
Sz = %(_zout)s->strides;
Nz = PyArray_DIMS(%(_zout)s);
Sz = PyArray_STRIDES(%(_zout)s);
"""
check_ab_double_or_float = ""
......@@ -1743,26 +1753,26 @@ class Dot22Scalar(GemmRelated):
setup_z_Nz_Sz = Dot22.setup_z_Nz_Sz
check_ab_double_or_float = """
if ((%(_a)s->descr->type_num != PyArray_DOUBLE)
&& (%(_a)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(_a)s)->type_num != NPY_DOUBLE)
&& (PyArray_DESCR(%(_a)s)->type_num != NPY_FLOAT))
{PyErr_SetString(PyExc_NotImplementedError,
"type(a) is not double or float"); %(fail)s;}
"""
case_float_ab_constants = """
#define REAL float
float a = (%(_a)s->descr->type_num == PyArray_FLOAT)
? (REAL)(((float*)%(_a)s->data)[0])
: (REAL)(((double*)%(_a)s->data)[0]);
float a = (PyArray_DESCR(%(_a)s)->type_num == NPY_FLOAT)
? (REAL)(((float*)PyArray_DATA(%(_a)s))[0])
: (REAL)(((double*)PyArray_DATA(%(_a)s))[0]);
#undef REAL
float b = 0.0;
"""
case_double_ab_constants = """
#define REAL double
double a = (%(_a)s->descr->type_num == PyArray_FLOAT)
? (REAL)(((float*)%(_a)s->data)[0])
: (REAL)(((double*)%(_a)s->data)[0]);
double a = (PyArray_DESCR(%(_a)s)->type_num == NPY_FLOAT)
? (REAL)(((float*)PyArray_DATA(%(_a)s))[0])
: (REAL)(((double*)PyArray_DATA(%(_a)s))[0]);
#undef REAL
double b = 0.0;
"""
......
......@@ -33,35 +33,35 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
int elemsize ;
if (%(A)s->nd != 2)
if (PyArray_NDIM(%(A)s) != 2)
{PyErr_SetString(PyExc_NotImplementedError, "rank(A) != 2"); %(fail)s;}
if (%(x)s->nd != 1)
if (PyArray_NDIM(%(x)s) != 1)
{PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 1"); %(fail)s;}
if (%(y)s->nd != 1)
if (PyArray_NDIM(%(y)s) != 1)
{PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 1"); %(fail)s;}
if (%(a)s->nd != 0)
if (PyArray_NDIM(%(a)s) != 0)
{PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 0"); %(fail)s;}
if (%(A)s->descr->type_num != %(x)s->descr->type_num)
if (PyArray_DESCR(%(A)s)->type_num != PyArray_DESCR(%(x)s)->type_num)
{ PyErr_SetString(PyExc_TypeError, "A vs. x"); %(fail)s; }
if (%(A)s->descr->type_num != %(y)s->descr->type_num)
if (PyArray_DESCR(%(A)s)->type_num != PyArray_DESCR(%(y)s)->type_num)
{ PyErr_SetString(PyExc_TypeError, "A vs. y"); %(fail)s; }
if (%(A)s->dimensions[0] != %(x)s->dimensions[0])
if (PyArray_DIMS(%(A)s)[0] != PyArray_DIMS(%(x)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"Shape mismatch: A.shape[0] != x.shape[0]");
%(fail)s;
}
if (%(A)s->dimensions[1] != %(y)s->dimensions[0])
if (PyArray_DIMS(%(A)s)[1] != PyArray_DIMS(%(y)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"Shape mismatch: A.shape[1] != y.shape[0]");
%(fail)s;
}
if (%(A)s->descr->type_num == PyArray_DOUBLE) { elemsize = 8; }
else if (%(A)s->descr->type_num == PyArray_FLOAT) { elemsize = 4;}
if (PyArray_DESCR(%(A)s)->type_num == NPY_DOUBLE) { elemsize = 8; }
else if (PyArray_DESCR(%(A)s)->type_num == NPY_FLOAT) { elemsize = 4;}
else
{
PyErr_SetString(PyExc_NotImplementedError, "complex CGer");
......@@ -70,22 +70,22 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
// copy A if !self.destructive or A is fully strided
if (!%(destructive)s
|| (%(A)s->strides[0] < 0)
|| (%(A)s->strides[1] < 0)
|| ((%(A)s->strides[0] != elemsize)
&& (%(A)s->strides[1] != elemsize)))
|| (PyArray_STRIDES(%(A)s)[0] < 0)
|| (PyArray_STRIDES(%(A)s)[1] < 0)
|| ((PyArray_STRIDES(%(A)s)[0] != elemsize)
&& (PyArray_STRIDES(%(A)s)[1] != elemsize)))
{
npy_intp dims[2];
dims[0] = %(A)s->dimensions[0];
dims[1] = %(A)s->dimensions[1];
dims[0] = PyArray_DIMS(%(A)s)[0];
dims[1] = PyArray_DIMS(%(A)s)[1];
if ((NULL == %(Z)s)
|| (%(Z)s->dimensions[0] != %(A)s->dimensions[0])
|| (%(Z)s->dimensions[1] != %(A)s->dimensions[1])
|| (%(Z)s->strides[0] < 0)
|| (%(Z)s->strides[1] < 0)
|| ((%(Z)s->strides[0] != elemsize)
&& (%(Z)s->strides[1] != elemsize)))
|| (PyArray_DIMS(%(Z)s)[0] != PyArray_DIMS(%(A)s)[0])
|| (PyArray_DIMS(%(Z)s)[1] != PyArray_DIMS(%(A)s)[1])
|| (PyArray_STRIDES(%(Z)s)[0] < 0)
|| (PyArray_STRIDES(%(Z)s)[1] < 0)
|| ((PyArray_STRIDES(%(Z)s)[0] != elemsize)
&& (PyArray_STRIDES(%(Z)s)[1] != elemsize)))
{
if (%(Z)s) Py_XDECREF(%(Z)s);
%(Z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
......@@ -101,14 +101,14 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
PyErr_SetString(PyExc_AssertionError, "%(Z)s != %(A)s");
%(fail)s
}
if (%(Z)s->descr->type_num == PyArray_FLOAT)
if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
{
float * zoutdata = (float*)%(Z)s->data;
const float * zdata = (float*)%(A)s->data;
int Ai = %(A)s->strides[0]/sizeof(float);
int Aj = %(A)s->strides[1]/sizeof(float);
int Zi = %(Z)s->strides[0]/sizeof(float);
int Zj = %(Z)s->strides[1]/sizeof(float);
float * zoutdata = (float*)PyArray_DATA(%(Z)s);
const float * zdata = (float*)PyArray_DATA(%(A)s);
int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(float);
int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(float);
int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(float);
int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(float);
for (int i = 0; i < dims[0]; ++i)
{
for (int j = 0; j < dims[1]; ++j)
......@@ -117,14 +117,14 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
}
}
}
else if (%(Z)s->descr->type_num == PyArray_DOUBLE)
else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
{
double * zoutdata = (double*) %(Z)s->data;
const double * zdata = (double*)%(A)s->data;
int Ai = %(A)s->strides[0]/sizeof(double);
int Aj = %(A)s->strides[1]/sizeof(double);
int Zi = %(Z)s->strides[0]/sizeof(double);
int Zj = %(Z)s->strides[1]/sizeof(double);
double * zoutdata = (double*) PyArray_DATA(%(Z)s);
const double * zdata = (double*)PyArray_DATA(%(A)s);
int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(double);
int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(double);
int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(double);
int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(double);
for (int i = 0; i < dims[0]; ++i)
{
for (int j = 0; j < dims[1]; ++j)
......@@ -152,10 +152,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
}
{
int Nz0 = %(Z)s->dimensions[0];
int Nz1 = %(Z)s->dimensions[1];
int Sx = %(x)s->strides[0] / elemsize;
int Sy = %(y)s->strides[0] / elemsize;
int Nz0 = PyArray_DIMS(%(Z)s)[0];
int Nz1 = PyArray_DIMS(%(Z)s)[1];
int Sx = PyArray_STRIDES(%(x)s)[0] / elemsize;
int Sy = PyArray_STRIDES(%(y)s)[0] / elemsize;
/* create appropriate strides for Z, if it is a row or column matrix.
* In that case, the value of the stride does not really matter, but
......@@ -163,11 +163,11 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
* - they are not smaller than the number of elements in the array,
* - they are not 0.
*/
int Sz0 = (Nz0 > 1) ? (%(Z)s->strides[0] / elemsize) : (Nz1 + 1);
int Sz1 = (Nz1 > 1) ? (%(Z)s->strides[1] / elemsize) : (Nz0 + 1);
int Sz0 = (Nz0 > 1) ? (PyArray_STRIDES(%(Z)s)[0] / elemsize) : (Nz1 + 1);
int Sz1 = (Nz1 > 1) ? (PyArray_STRIDES(%(Z)s)[1] / elemsize) : (Nz0 + 1);
dtype_%(x)s* x_data = (dtype_%(x)s*) %(x)s->data;
dtype_%(y)s* y_data = (dtype_%(y)s*) %(y)s->data;
dtype_%(x)s* x_data = (dtype_%(x)s*) PyArray_DATA(%(x)s);
dtype_%(y)s* y_data = (dtype_%(y)s*) PyArray_DATA(%(y)s);
// gemv expects pointers to the beginning of memory arrays,
// but numpy provides provides a pointer to the first element,
// so when the stride is negative, we need to get the last one.
......@@ -176,24 +176,24 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
if (Sy < 0)
y_data += (Nz1 - 1) * Sy;
if (%(Z)s->strides[0] == elemsize)
if (PyArray_STRIDES(%(Z)s)[0] == elemsize)
{
if (%(Z)s->descr->type_num == PyArray_FLOAT)
if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
{
//fprintf(stderr, "A\\n");
float alpha = ((dtype_%(a)s*)%(a)s->data)[0];
float alpha = ((dtype_%(a)s*)PyArray_DATA(%(a)s))[0];
sger_(&Nz0, &Nz1, &alpha,
(float*)x_data, &Sx,
(float*)y_data, &Sy,
(float*)(%(Z)s->data), &Sz1);
(float*)(PyArray_DATA(%(Z)s)), &Sz1);
}
else if (%(Z)s->descr->type_num == PyArray_DOUBLE)
else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
{
double alpha = ((dtype_%(a)s*)%(a)s->data)[0];
double alpha = ((dtype_%(a)s*)PyArray_DATA(%(a)s))[0];
dger_(&Nz0, &Nz1, &alpha,
(double*)x_data, &Sx,
(double*)y_data, &Sy,
(double*)(%(Z)s->data), &Sz1);
(double*)(PyArray_DATA(%(Z)s)), &Sz1);
}
else {
PyErr_SetString(PyExc_NotImplementedError,
......@@ -201,26 +201,26 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
%(fail)s
}
}
else if (%(Z)s->strides[1] == elemsize)
else if (PyArray_STRIDES(%(Z)s)[1] == elemsize)
{
if (%(Z)s->descr->type_num == PyArray_FLOAT)
if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
{
//fprintf(stderr, "B %%i %%i %%i %%i\\n", Nz0, Nz1, Sz0, Sz1);
float alpha = ((dtype_%(a)s*)(%(a)s->data))[0];
float alpha = ((dtype_%(a)s*)(PyArray_DATA(%(a)s)))[0];
//fprintf(stderr, "alpha=%%f\\n", alpha);
//fprintf(stderr, "sx sy %%i %%i\\n", Sx, Sy);
sger_(&Nz1, &Nz0, &alpha,
(float*)y_data, &Sy,
(float*)x_data, &Sx,
(float*)(%(Z)s->data), &Sz0);
(float*)(PyArray_DATA(%(Z)s)), &Sz0);
}
else if (%(Z)s->descr->type_num == PyArray_DOUBLE)
else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
{
double alpha = ((dtype_%(a)s*)%(a)s->data)[0];
double alpha = ((dtype_%(a)s*)PyArray_DATA(%(a)s))[0];
dger_(&Nz1, &Nz0, &alpha,
(double*)y_data, &Sy,
(double*)x_data, &Sx,
(double*)(%(Z)s->data), &Sz0);
(double*)(PyArray_DATA(%(Z)s)), &Sz0);
}
else
{
......@@ -290,68 +290,68 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
float fbeta;
double dbeta;
if (%(aa)s->nd != 1)
if (PyArray_NDIM(%(aa)s) != 1)
{
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(aa) != 1");
%(fail)s;
}
if (%(xx)s->nd != 2)
if (PyArray_NDIM(%(xx)s) != 2)
{
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(xx) != 2");
%(fail)s;
}
if (%(yy)s->nd != 1)
if (PyArray_NDIM(%(yy)s) != 1)
{
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(yy) != 1");
%(fail)s;
}
if (%(alpha)s->nd != 0)
if (PyArray_NDIM(%(alpha)s) != 0)
{
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(alpha) != 0");
%(fail)s;
}
if (%(beta)s->nd != 0)
if (PyArray_NDIM(%(beta)s) != 0)
{
PyErr_SetString(PyExc_NotImplementedError, "Gemv: rank(beta) != 0");
%(fail)s;
}
if (%(aa)s->descr->type_num != %(xx)s->descr->type_num)
if (PyArray_DESCR(%(aa)s)->type_num != PyArray_DESCR(%(xx)s)->type_num)
{ PyErr_SetString(PyExc_TypeError, "Gemv: aa vs. xx"); %(fail)s; }
if (%(aa)s->descr->type_num != %(yy)s->descr->type_num)
if (PyArray_DESCR(%(aa)s)->type_num != PyArray_DESCR(%(yy)s)->type_num)
{ PyErr_SetString(PyExc_TypeError, "Gemv: aa vs. yy"); %(fail)s; }
if (%(xx)s->dimensions[0] != %(aa)s->dimensions[0])
if (PyArray_DIMS(%(xx)s)[0] != PyArray_DIMS(%(aa)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"Shape mismatch: A.shape[0] != x.shape[0]");
%(fail)s;
}
if (%(xx)s->dimensions[1] != %(yy)s->dimensions[0])
if (PyArray_DIMS(%(xx)s)[1] != PyArray_DIMS(%(yy)s)[0])
{
PyErr_SetString(PyExc_ValueError,
"Shape mismatch: A.shape[1] != y.shape[0]");
%(fail)s;
}
if (%(aa)s->descr->type_num == PyArray_DOUBLE) { elemsize = 8; }
else if (%(aa)s->descr->type_num == PyArray_FLOAT) { elemsize = 4;}
if (PyArray_DESCR(%(aa)s)->type_num == NPY_DOUBLE) { elemsize = 8; }
else if (PyArray_DESCR(%(aa)s)->type_num == NPY_FLOAT) { elemsize = 4;}
else {
PyErr_SetString(PyExc_NotImplementedError, "complex Gemv");
%(fail)s;
}
fbeta = dbeta = ((dtype_%(beta)s*)%(beta)s->data)[0];
fbeta = dbeta = ((dtype_%(beta)s*)PyArray_DATA(%(beta)s))[0];
// copy aa if not destructive
if (!%(destructive)s)
{
if ((NULL == %(zz)s)
|| (%(zz)s->dimensions[0] != %(aa)s->dimensions[0]))
|| (PyArray_DIMS(%(zz)s)[0] != PyArray_DIMS(%(aa)s)[0]))
{
if (%(zz)s) Py_XDECREF(%(zz)s);
%(zz)s = (PyArrayObject*)PyArray_SimpleNew(1,
%(aa)s->dimensions, type_num_%(aa)s);
PyArray_DIMS(%(aa)s), type_num_%(aa)s);
if(!%(zz)s) {
PyErr_SetString(PyExc_MemoryError,
"failed to alloc gemv output");
......@@ -365,24 +365,24 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
}
if (dbeta != 0)
{
if (%(zz)s->descr->type_num == PyArray_FLOAT)
if (PyArray_DESCR(%(zz)s)->type_num == NPY_FLOAT)
{
float * zoutdata = (float*)%(zz)s->data;
const float * zdata = (float*)%(aa)s->data;
int Ai = %(aa)s->strides[0]/sizeof(float);
int Zi = %(zz)s->strides[0]/sizeof(float);
for (int i = 0; i < %(aa)s->dimensions[0]; ++i)
float * zoutdata = (float*)PyArray_DATA(%(zz)s);
const float * zdata = (float*)PyArray_DATA(%(aa)s);
int Ai = PyArray_STRIDES(%(aa)s)[0]/sizeof(float);
int Zi = PyArray_STRIDES(%(zz)s)[0]/sizeof(float);
for (int i = 0; i < PyArray_DIMS(%(aa)s)[0]; ++i)
{
zoutdata[Zi*i] = fbeta * zdata[Ai*i];
}
}
else if (%(xx)s->descr->type_num == PyArray_DOUBLE)
else if (PyArray_DESCR(%(xx)s)->type_num == NPY_DOUBLE)
{
double * zoutdata = (double*) %(zz)s->data;
const double * zdata = (double*)%(aa)s->data;
int Ai = %(aa)s->strides[0]/sizeof(double);
int Zi = %(zz)s->strides[0]/sizeof(double);
for (int i = 0; i < %(aa)s->dimensions[0]; ++i)
double * zoutdata = (double*) PyArray_DATA(%(zz)s);
const double * zdata = (double*)PyArray_DATA(%(aa)s);
int Ai = PyArray_STRIDES(%(aa)s)[0]/sizeof(double);
int Zi = PyArray_STRIDES(%(zz)s)[0]/sizeof(double);
for (int i = 0; i < PyArray_DIMS(%(aa)s)[0]; ++i)
{
zoutdata[Zi*i] = dbeta * zdata[Ai*i];
}
......@@ -409,20 +409,20 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
{
char TRANS = 'T';
char NOTRANS = 'N';
int Nx0 = %(xx)s->dimensions[0];
int Nx1 = %(xx)s->dimensions[1];
int Nx0 = PyArray_DIMS(%(xx)s)[0];
int Nx1 = PyArray_DIMS(%(xx)s)[1];
/* This formula is needed in the case where xx is actually a row or
* column matrix, because BLAS sometimes insists that the strides:
* - are not smaller than the number of elements in the array
* - are not 0.
*/
int Sx0 = (Nx0 > 1) ? (%(xx)s->strides[0] / elemsize) : (Nx1 + 1);
int Sx1 = (Nx1 > 1) ? (%(xx)s->strides[1] / elemsize) : (Nx0 + 1);
int Sz = %(zz)s->strides[0] / elemsize;
int Sy = %(yy)s->strides[0] / elemsize;
int Sx0 = (Nx0 > 1) ? (PyArray_STRIDES(%(xx)s)[0] / elemsize) : (Nx1 + 1);
int Sx1 = (Nx1 > 1) ? (PyArray_STRIDES(%(xx)s)[1] / elemsize) : (Nx0 + 1);
int Sz = PyArray_STRIDES(%(zz)s)[0] / elemsize;
int Sy = PyArray_STRIDES(%(yy)s)[0] / elemsize;
dtype_%(yy)s* yy_data = (dtype_%(yy)s*) %(yy)s->data;
dtype_%(zz)s* zz_data = (dtype_%(zz)s*) %(zz)s->data;
dtype_%(yy)s* yy_data = (dtype_%(yy)s*) PyArray_DATA(%(yy)s);
dtype_%(zz)s* zz_data = (dtype_%(zz)s*) PyArray_DATA(%(zz)s);
// gemv expects pointers to the beginning of memory arrays,
// but numpy provides provides a pointer to the first element,
// so when the stride is negative, we need to get the last one.
......@@ -439,10 +439,10 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
// gemv on reversed matrix and vectors
// - if the copy is too long, maybe call vector/vector dot on
// each row instead
if ((%(xx)s->strides[0] < 0)
|| (%(xx)s->strides[1] < 0)
|| ((%(xx)s->strides[0] != elemsize)
&& (%(xx)s->strides[1] != elemsize)))
if ((PyArray_STRIDES(%(xx)s)[0] < 0)
|| (PyArray_STRIDES(%(xx)s)[1] < 0)
|| ((PyArray_STRIDES(%(xx)s)[0] != elemsize)
&& (PyArray_STRIDES(%(xx)s)[1] != elemsize)))
{
npy_intp dims[2];
dims[0] = Nx0;
......@@ -454,29 +454,29 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
%(fail)s
Py_XDECREF(%(xx)s);
%(xx)s = xx_copy;
Sx0 = (Nx0 > 1) ? (%(xx)s->strides[0] / elemsize) : (Nx1 + 1);
Sx1 = (Nx1 > 1) ? (%(xx)s->strides[1] / elemsize) : (Nx0 + 1);
Sx0 = (Nx0 > 1) ? (PyArray_STRIDES(%(xx)s)[0] / elemsize) : (Nx1 + 1);
Sx1 = (Nx1 > 1) ? (PyArray_STRIDES(%(xx)s)[1] / elemsize) : (Nx0 + 1);
}
if (%(xx)s->strides[0] == elemsize)
if (PyArray_STRIDES(%(xx)s)[0] == elemsize)
{
if (%(xx)s->descr->type_num == PyArray_FLOAT)
if (PyArray_DESCR(%(xx)s)->type_num == NPY_FLOAT)
{
//fprintf(stderr, "A\\n");
float alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0];
float alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
sgemv_(&NOTRANS, &Nx0, &Nx1,
&alpha,
(float*)(%(xx)s->data), &Sx1,
(float*)(PyArray_DATA(%(xx)s)), &Sx1,
(float*)yy_data, &Sy,
&fbeta,
(float*)zz_data, &Sz);
}
else if (%(xx)s->descr->type_num == PyArray_DOUBLE)
else if (PyArray_DESCR(%(xx)s)->type_num == NPY_DOUBLE)
{
double alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0];
double alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
dgemv_(&NOTRANS, &Nx0, &Nx1,
&alpha,
(double*)(%(xx)s->data), &Sx1,
(double*)(PyArray_DATA(%(xx)s)), &Sx1,
(double*)yy_data, &Sy,
&dbeta,
(double*)zz_data, &Sz);
......@@ -488,28 +488,28 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
%(fail)s
}
}
else if (%(xx)s->strides[1] == elemsize)
else if (PyArray_STRIDES(%(xx)s)[1] == elemsize)
{
if (%(xx)s->descr->type_num == PyArray_FLOAT)
if (PyArray_DESCR(%(xx)s)->type_num == NPY_FLOAT)
{
//fprintf(stderr, "B %%i %%i %%i %%i\\n",
// Nz0, Nz1, Sz0, Sz1);
float alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0];
float alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
//fprintf(stderr, "alpha=%%f\\n", alpha);
//fprintf(stderr, "sx sy %%i %%i\\n", Sx, Sy);
sgemv_(&TRANS, &Nx1, &Nx0,
&alpha,
(float*)(%(xx)s->data), &Sx0,
(float*)(PyArray_DATA(%(xx)s)), &Sx0,
(float*)yy_data, &Sy,
&fbeta,
(float*)zz_data, &Sz);
}
else if (%(xx)s->descr->type_num == PyArray_DOUBLE)
else if (PyArray_DESCR(%(xx)s)->type_num == NPY_DOUBLE)
{
double alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0];
double alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
dgemv_(&TRANS, &Nx1, &Nx0,
&alpha,
(double*)(%(xx)s->data), &Sx0,
(double*)(PyArray_DATA(%(xx)s)), &Sx0,
(double*)yy_data, &Sy,
&dbeta,
(double*)zz_data, &Sz);
......@@ -534,7 +534,7 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
// the matrix has at least one dim of length 0
// so we do this loop, which either iterates over 0 elements
// or else it does the right thing for length-0 x.
dtype_%(zz)s * zptr = (dtype_%(zz)s*)(%(zz)s->data);
dtype_%(zz)s * zptr = (dtype_%(zz)s*)(PyArray_DATA(%(zz)s));
for (int i = 0; i < Nx0; ++i)
{
zptr[i * Sz] *= dbeta;
......
......@@ -794,41 +794,41 @@ def ____gemm_code(check_ab, a_init, b_init):
return """
const char * error_string = NULL;
int type_num = _x->descr->type_num;
int type_size = _x->descr->elsize; // in bytes
int type_num = PyArray_DESCR(_x)->type_num;
int type_size = PyArray_DESCR(_x)->elsize; // in bytes
npy_intp* Nx = _x->dimensions;
npy_intp* Ny = _y->dimensions;
npy_intp* Nz = _z->dimensions;
npy_intp* Nx = PyArray_DIMS(_x);
npy_intp* Ny = PyArray_DIMS(_y);
npy_intp* Nz = PyArray_DIMS(_z);
npy_intp* Sx = _x->strides;
npy_intp* Sy = _y->strides;
npy_intp* Sz = _z->strides;
npy_intp* Sx = PyArray_STRIDES(_x);
npy_intp* Sy = PyArray_STRIDES(_y);
npy_intp* Sz = PyArray_STRIDES(_z);
size_t sx_0, sx_1, sy_0, sy_1, sz_0, sz_1;
int unit = 0;
if (_x->nd != 2) goto _dot_execute_fallback;
if (_y->nd != 2) goto _dot_execute_fallback;
if (_z->nd != 2) goto _dot_execute_fallback;
if (PyArray_NDIM(_x) != 2) goto _dot_execute_fallback;
if (PyArray_NDIM(_y) != 2) goto _dot_execute_fallback;
if (PyArray_NDIM(_z) != 2) goto _dot_execute_fallback;
%(check_ab)s
if ((_x->descr->type_num != PyArray_DOUBLE)
&& (_x->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(_x)->type_num != NPY_DOUBLE)
&& (PyArray_DESCR(_x)->type_num != NPY_FLOAT))
goto _dot_execute_fallback;
if ((_y->descr->type_num != PyArray_DOUBLE)
&& (_y->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(_y)->type_num != NPY_DOUBLE)
&& (PyArray_DESCR(_y)->type_num != NPY_FLOAT))
goto _dot_execute_fallback;
if ((_y->descr->type_num != PyArray_DOUBLE)
&& (_y->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(_y)->type_num != NPY_DOUBLE)
&& (PyArray_DESCR(_y)->type_num != NPY_FLOAT))
goto _dot_execute_fallback;
if ((_x->descr->type_num != _y->descr->type_num)
||(_x->descr->type_num != _z->descr->type_num))
if ((PyArray_DESCR(_x)->type_num != PyArray_DESCR(_y)->type_num)
||(PyArray_DESCR(_x)->type_num != PyArray_DESCR(_z)->type_num))
goto _dot_execute_fallback;
......@@ -863,7 +863,7 @@ def ____gemm_code(check_ab, a_init, b_init):
switch (type_num)
{
case PyArray_FLOAT:
case NPY_FLOAT:
{
#define REAL float
float a = %(a_init)s;
......@@ -888,7 +888,7 @@ def ____gemm_code(check_ab, a_init, b_init):
#undef REAL
}
break;
case PyArray_DOUBLE:
case NPY_DOUBLE:
{
#define REAL double
double a = %(a_init)s;
......
......@@ -270,7 +270,7 @@ class DimShuffle(Op):
nd_in = len(self.input_broadcastable)
nd_out = len(self.new_order)
check_input_nd = [('if (%(input)s->nd != ' + str(nd_in) + ')'
check_input_nd = [('if (PyArray_NDIM(%(input)s) != ' + str(nd_in) + ')'
'{PyErr_SetString(PyExc_NotImplementedError, "input nd"); %(fail)s;}')]
clear_output = ['if (%(res)s) {Py_XDECREF(%(res)s);}']
......@@ -282,13 +282,13 @@ class DimShuffle(Op):
'{ PyArrayObject * %(basename)s = %(input)s', 'Py_INCREF((PyObject*)%(basename)s)']
else:
get_base = [('{ PyArrayObject * %(basename)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s, NULL,'
'0, 0, NPY_ALIGNED|NPY_ENSURECOPY, NULL)')]
'0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY, NULL)')]
shape_statements = ['npy_intp dimensions[%i]' % nd_out]
for i, o in enumerate(self.new_order):
if o != 'x':
shape_statements += [('dimensions[' + str(
i) + '] = %(basename)s->dimensions[' + str(o) + ']')]
i) + '] = PyArray_DIMS(%(basename)s)[' + str(o) + ']')]
else:
shape_statements += [('dimensions[' + str(i) + '] = 1')]
......@@ -298,7 +298,7 @@ class DimShuffle(Op):
for i, o in enumerate(self.new_order):
if o != 'x':
strides_statements += [('strides[' + str(i)
+ '] = %(basename)s->strides[' + str(o) + ']')]
+ '] = PyArray_STRIDES(%(basename)s)[' + str(o) + ']')]
else:
strides_statements += [('strides[' + str(i) + '] = 0')]
......@@ -311,7 +311,7 @@ class DimShuffle(Op):
str(nd_out) +
'-1] == 0) strides[' +
str(nd_out) +
'-1] = %(basename)s->descr->elsize'
'-1] = PyArray_DESCR(%(basename)s)->elsize'
)
for i in xrange(nd_out - 2, -1, -1):
strides_statements.append(
......@@ -326,14 +326,20 @@ class DimShuffle(Op):
('%(res)s = (PyArrayObject*)PyArray_New(&PyArray_Type, '
'' + str(nd_out) + ', dimensions, '
'PyArray_TYPE(%(basename)s), strides, '
'%(basename)s->data, PyArray_ITEMSIZE(%(basename)s), '
'PyArray_DATA(%(basename)s), PyArray_ITEMSIZE(%(basename)s), '
#borrow only the writable flag from the base
# the NPY_OWNDATA flag will default to 0.
'(NPY_WRITEABLE*PyArray_ISWRITEABLE(%(basename)s)), NULL)'),
'(NPY_ARRAY_WRITEABLE*PyArray_ISWRITEABLE(%(basename)s)), NULL)'),
#recalculate flags: CONTIGUOUS, FORTRAN, ALIGNED
'PyArray_UpdateFlags(%(res)s, NPY_UPDATE_ALL)',
'PyArray_UpdateFlags(%(res)s, NPY_ARRAY_UPDATE_ALL)',
#we are making a view in both inplace and non-inplace cases
'%(res)s->base = (PyObject*)%(basename)s',
"""
#if NPY_VERSION <= 0x01000009
PyArray_BASE(%(res)s) = (PyObject*)%(basename)s;
#else
PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
#endif
"""
'}']
full_code = statements(check_input_nd
......@@ -1341,7 +1347,7 @@ class CAReduce(Op):
pattern_ = str(pattern)[1:-1]
decl += """int tosum[]={%(pattern_)s};""" % locals()
alloc += """
for(int i=0;i<%(iname)s->nd;i++){
for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
if(PyArray_DIMS(%(iname)s)[i]==0 && tosum[i]){
PyErr_Format(PyExc_ValueError,
"Input of CAReduce{%(scal_name)s} has zero-size on axis %%d",i);
......
......@@ -47,7 +47,7 @@ def make_checks(loop_orders, dtypes, sub):
# tensor is as expected.
min_nd = max(nonx) + 1
init += """
if (%(var)s->nd < %(min_nd)s) {
if (PyArray_NDIM(%(var)s) < %(min_nd)s) {
PyErr_SetString(PyExc_ValueError, "Not enough dimensions on input.");
%%(fail)s
}
......@@ -67,8 +67,8 @@ def make_checks(loop_orders, dtypes, sub):
# jump = stride - adjust
jump = "(%s) - (%s)" % ("%(var)s_stride%(index)s" % locals(), adjust)
init += """
%(var)s_n%(index)s = %(var)s->dimensions[%(index)s];
%(var)s_stride%(index)s = %(var)s->strides[%(index)s] / sizeof(%(dtype)s);
%(var)s_n%(index)s = PyArray_DIMS(%(var)s)[%(index)s];
%(var)s_stride%(index)s = PyArray_STRIDES(%(var)s)[%(index)s] / sizeof(%(dtype)s);
%(var)s_jump%(index)s_%(j)s = %(jump)s;
//printf("%(var)s_jump%(index)s_%(j)s is:");
//std::cout << %(var)s_jump%(index)s_%(j)s << std::endl;
......@@ -152,7 +152,7 @@ def make_alloc(loop_orders, dtype, sub):
PyArray_Dims new_dims;
new_dims.len = %(nd)s;
new_dims.ptr = dims;
PyObject* success = PyArray_Resize(%(olv)s, &new_dims, 0, PyArray_CORDER);
PyObject* success = PyArray_Resize(%(olv)s, &new_dims, 0, NPY_CORDER);
if (!success) {
// If we can't resize the ndarray we have we can allocate a new one.
PyErr_Clear();
......@@ -215,11 +215,11 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub):
for j, index in enumerate(loop_order):
if index != 'x':
preloops.setdefault(j, "")
preloops[j] += ("%%(lv%(i)s)s_iter = (%(dtype)s*)(%%(lv%(i)s)s->data);\n" % locals()) % sub
preloops[j] += ("%%(lv%(i)s)s_iter = (%(dtype)s*)(PyArray_DATA(%%(lv%(i)s)s));\n" % locals()) % sub
break
else: # all broadcastable
preloops.setdefault(0, "")
preloops[0] += ("%%(lv%(i)s)s_iter = (%(dtype)s*)(%%(lv%(i)s)s->data);\n" % locals()) % sub
preloops[0] += ("%%(lv%(i)s)s_iter = (%(dtype)s*)(PyArray_DATA(%%(lv%(i)s)s));\n" % locals()) % sub
if len(loop_tasks) == 1:
s = preloops.get(0, "")
......@@ -263,7 +263,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
for i, index in enumerate(init_loop_orders[olv_index]):
if index != 'x':
order_loops += """
%(ovar)s_loops_it->first = abs(%(ovar)s->strides[%(index)i]);
%(ovar)s_loops_it->first = abs(PyArray_STRIDES(%(ovar)s)[%(index)i]);
""" % locals()
else:
# Stride is 0 when dimension is broadcastable
......@@ -375,7 +375,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
declare_iter = ""
for i, dtype in enumerate(dtypes):
var = sub["lv%i" % i]
declare_iter += "%(var)s_iter = (%(dtype)s*)(%(var)s->data);\n" % locals()
declare_iter += "%(var)s_iter = (%(dtype)s*)(PyArray_DATA(%(var)s));\n" % locals()
loop = inner_task
for i in reversed(range(nnested)):
......
......@@ -161,7 +161,6 @@ class Conv3D(theano.Op):
def c_compile_args(self):
flags = ldflags(libs=False, flags=True)
flags.append('-Werror')
return flags
def c_lib_dirs(self):
......@@ -183,62 +182,62 @@ class Conv3D(theano.Op):
//printf("\t\t\t\tConv3D c code\\n");
//Check dimensionality of inputs
if (%(W)s->nd != 5)
if (PyArray_NDIM(%(W)s) != 5)
{
PyErr_Format(PyExc_ValueError, "Conv3D: W must be a 5 dimensional tensor");
%(fail)s
}
if (%(V)s->nd != 5)
if (PyArray_NDIM(%(V)s) != 5)
{
PyErr_Format(PyExc_ValueError, "Conv3D: V must be a 5 dimensional tensor");
%(fail)s
}
if (%(b)s->nd != 1)
if (PyArray_NDIM(%(b)s) != 1)
{
PyErr_Format(PyExc_ValueError,"Conv3D: b must be a vector.");
%(fail)s
}
if (%(d)s->nd != 1)
if (PyArray_NDIM(%(d)s) != 1)
{
PyErr_Format(PyExc_ValueError,"Conv3D: d must be a vector.");
%(fail)s
}
if (%(d)s->dimensions[0] != 3)
if (PyArray_DIMS(%(d)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError,"Conv3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)%(d)s->dimensions[0]);
PyErr_Format(PyExc_ValueError,"Conv3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0]);
%(fail)s
}
//Read and check sizes of inputs
{ // exta scope so error handler jumps don't cause errors
const int batchSize = %(V)s->dimensions[0];
const int outputChannels = %(W)s->dimensions[0];
const int inputChannels = %(V)s->dimensions[4];
const int batchSize = PyArray_DIMS(%(V)s)[0];
const int outputChannels = PyArray_DIMS(%(W)s)[0];
const int inputChannels = PyArray_DIMS(%(V)s)[4];
if (%(W)s->dimensions[4] != inputChannels)
if (PyArray_DIMS(%(W)s)[4] != inputChannels)
{
PyErr_Format(PyExc_ValueError, "Conv3D: W operates on a %%ld channel image but the image has %%d channels. Overall shape of input: (%%ld,%%ld,%%ld,%%ld,%%ld)", (long)%(W)s->dimensions[4], inputChannels, (long)%(V)s->dimensions[0], (long)%(V)s->dimensions[1], (long)%(V)s->dimensions[2], (long)%(V)s->dimensions[3], (long)%(V)s->dimensions[4]);
PyErr_Format(PyExc_ValueError, "Conv3D: W operates on a %%ld channel image but the image has %%d channels. Overall shape of input: (%%ld,%%ld,%%ld,%%ld,%%ld)", (long)PyArray_DIMS(%(W)s)[4], inputChannels, (long)PyArray_DIMS(%(V)s)[0], (long)PyArray_DIMS(%(V)s)[1], (long)PyArray_DIMS(%(V)s)[2], (long)PyArray_DIMS(%(V)s)[3], (long)PyArray_DIMS(%(V)s)[4]);
%(fail)s
}
if (%(b)s->dimensions[0] != outputChannels)
if (PyArray_DIMS(%(b)s)[0] != outputChannels)
{
PyErr_Format(PyExc_ValueError, "Conv3D: b adds to a(n) %%ld channel output image but the output has %%d channels", (long)%(b)s->dimensions[0], outputChannels);
PyErr_Format(PyExc_ValueError, "Conv3D: b adds to a(n) %%ld channel output image but the output has %%d channels", (long)PyArray_DIMS(%(b)s)[0], outputChannels);
%(fail)s
}
{ //extra scope so error handler jumps don't cause errors
const int filterHeight = %(W)s->dimensions[1];
const int filterWidth = %(W)s->dimensions[2];
const int filterDur = %(W)s->dimensions[3];
const int vidHeight = %(V)s->dimensions[1];
const int vidWidth = %(V)s->dimensions[2];
const int vidDur = %(V)s->dimensions[3];\
const int filterHeight = PyArray_DIMS(%(W)s)[1];
const int filterWidth = PyArray_DIMS(%(W)s)[2];
const int filterDur = PyArray_DIMS(%(W)s)[3];
const int vidHeight = PyArray_DIMS(%(V)s)[1];
const int vidWidth = PyArray_DIMS(%(V)s)[2];
const int vidDur = PyArray_DIMS(%(V)s)[3];\
if (vidHeight < filterHeight)
{
......@@ -291,13 +290,13 @@ class Conv3D(theano.Op):
if(!(%(H)s) || %(H)s->dimensions[0]!=dims[0] ||
%(H)s->dimensions[1]!=dims[1] ||
%(H)s->dimensions[2]!=dims[2] ||
%(H)s->dimensions[3]!=dims[3] ||
%(H)s->dimensions[4]!=dims[4]){
if(!(%(H)s) || PyArray_DIMS(%(H)s)[0]!=dims[0] ||
PyArray_DIMS(%(H)s)[1]!=dims[1] ||
PyArray_DIMS(%(H)s)[2]!=dims[2] ||
PyArray_DIMS(%(H)s)[3]!=dims[3] ||
PyArray_DIMS(%(H)s)[4]!=dims[4]){
Py_XDECREF(%(H)s);
%(H)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, %(V)s->descr->type_num);
%(H)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(V)s)->type_num);
if (!(%(H)s)) {
PyErr_Format(PyExc_MemoryError,"Conv3D: Could not allocate output.");
%(fail)s
......@@ -306,20 +305,20 @@ class Conv3D(theano.Op):
{ // extra scope so fail works
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )
const int ws0 = %(W)s->strides[0];
const int ws1 = %(W)s->strides[1];
const int ws2 = %(W)s->strides[2];
const int vs1 = %(V)s->strides[1];
const int ws4 = %(W)s->strides[4];
const int vs4 = %(V)s->strides[4];
const int ws3 = %(W)s->strides[3];
const int vs3 = %(V)s->strides[3];
const int vs2 = %(V)s->strides[2];
const int bs = %(b)s->strides[0];
const int hs4 = %(H)s->strides[4];
const int ws0 = PyArray_STRIDES(%(W)s)[0];
const int ws1 = PyArray_STRIDES(%(W)s)[1];
const int ws2 = PyArray_STRIDES(%(W)s)[2];
const int vs1 = PyArray_STRIDES(%(V)s)[1];
const int ws4 = PyArray_STRIDES(%(W)s)[4];
const int vs4 = PyArray_STRIDES(%(V)s)[4];
const int ws3 = PyArray_STRIDES(%(W)s)[3];
const int vs3 = PyArray_STRIDES(%(V)s)[3];
const int vs2 = PyArray_STRIDES(%(V)s)[2];
const int bs = PyArray_STRIDES(%(b)s)[0];
const int hs4 = PyArray_STRIDES(%(H)s)[4];
......@@ -425,20 +424,20 @@ class Conv3D(theano.Op):
Wpos = Wposl + ws2;
Vpos = Vposl + vs2;
} //close l
Wpos = Wposk + %(W)s->strides[1];
Vpos = Vposk + %(V)s->strides[1];
Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k
Hpos = Hpost + %(H)s->strides[3];
Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt;
} //close t
Hpos = Hposc + %(H)s->strides[2];
Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc;
} //close c
Hpos = Hposr + %(H)s->strides[1];
Vpos = Vposr + %(V)s->strides[1] * dr;
Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r
Hpos = Hposi + %(H)s->strides[0];
Vpos = Vposi + %(V)s->strides[0];
Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i
......@@ -516,8 +515,8 @@ class Conv3D(theano.Op):
Wpos = Wposl + ws2;
Vpos = Vposl + vs2;
} //close l
Wpos = Wposk + %(W)s->strides[1];
Vpos = Vposk + %(V)s->strides[1];
Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k
......@@ -528,17 +527,17 @@ class Conv3D(theano.Op):
//std::cout << "incremented Wpos by " << ws0 << std::endl;
//std::cout << "incremented Hpos by " << hs4 << std::endl;
} //close j
Hpos = Hpost + %(H)s->strides[3];
Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt;
} //close t
Hpos = Hposc + %(H)s->strides[2];
Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc;
} //close c
Hpos = Hposr + %(H)s->strides[1];
Vpos = Vposr + %(V)s->strides[1] * dr;
Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r
Hpos = Hposi + %(H)s->strides[0];
Vpos = Vposi + %(V)s->strides[0];
Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i
} //closes general case code
}}}}}}} //extra scope so error handler jumps don't cross declarations
......
......@@ -85,10 +85,6 @@ class ConvGrad3D(theano.Op):
output_storage[0][0] = dCdW
def c_compile_args(self):
flags = ['-Werror']
return flags
def c_code(self, node, nodename, inputs, outputs, sub):
V, d, WShape, dCdH = inputs
fail = sub['fail']
......@@ -101,40 +97,40 @@ class ConvGrad3D(theano.Op):
//printf("\t\t\t\tConvGradW3D c code\\n");
//Check dimensionality of inputs
if (%(dCdH)s->nd != 5)
if (PyArray_NDIM(%(dCdH)s) != 5)
{
PyErr_Format(PyExc_ValueError, "ConvGrad3D: dCdH must be a 5 dimensional tensor");
%(fail)s
}
if (%(V)s->nd != 5)
if (PyArray_NDIM(%(V)s) != 5)
{
PyErr_Format(PyExc_ValueError, "ConvGrad3D: V must be a 5 dimensional tensor");
%(fail)s
}
if (%(WShape)s->nd != 1)
if (PyArray_NDIM(%(WShape)s) != 1)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: WShape must be a vector.");
%(fail)s
}
if (%(d)s->nd != 1)
if (PyArray_NDIM(%(d)s) != 1)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: d must be a vector.");
%(fail)s
}
if (%(d)s->dimensions[0] != 3)
if (PyArray_DIMS(%(d)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)%(d)s->dimensions[0]);
PyErr_Format(PyExc_ValueError,"ConvGrad3D: 3 stride length arguments expected (row, col, time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0]);
%(fail)s
}
{ //extra scope so that fail will not jump over declarations
//Read and check sizes of inputs
const int batchSize = %(V)s->dimensions[0];
if (%(WShape)s->dimensions[0] != 5)
const int batchSize = PyArray_DIMS(%(V)s)[0];
if (PyArray_DIMS(%(WShape)s)[0] != 5)
{
PyErr_Format(PyExc_ValueError,"ConvGrad3D: WShape must specify a 5D shape");
%(fail)s
......@@ -146,9 +142,9 @@ class ConvGrad3D(theano.Op):
}
{ //extra scope so that fail will not jump over declarations
dtype_%(WShape)s * WShape = (dtype_%(WShape)s *) %(WShape)s->data;
dtype_%(WShape)s * WShape = (dtype_%(WShape)s *) PyArray_DATA(%(WShape)s);
const int outputChannels = WShape[0];
const int inputChannels = %(V)s->dimensions[4];
const int inputChannels = PyArray_DIMS(%(V)s)[4];
if (WShape[4] != inputChannels)
{
PyErr_Format(PyExc_ValueError, "ConvGrad3D: W operates on a %%i channel image but the image has %%i channels",(int) WShape[1],inputChannels);
......@@ -159,9 +155,9 @@ class ConvGrad3D(theano.Op):
const int filterHeight = WShape[1];
const int filterWidth = WShape[2];
const int filterDur = WShape[3];
const int vidHeight = %(V)s->dimensions[1];
const int vidWidth = %(V)s->dimensions[2];
const int vidDur = %(V)s->dimensions[3];
const int vidHeight = PyArray_DIMS(%(V)s)[1];
const int vidWidth = PyArray_DIMS(%(V)s)[2];
const int vidDur = PyArray_DIMS(%(V)s)[3];
if (vidHeight < filterHeight)
{
PyErr_Format(PyExc_ValueError, "ConvGrad3D: W has a height of %%i but V is only %%i pixels tall", filterHeight, vidHeight);
......@@ -197,13 +193,13 @@ class ConvGrad3D(theano.Op):
if (%(dCdH)s->dimensions[0] != batchSize ||
%(dCdH)s->dimensions[4] != outputChannels ||
%(dCdH)s->dimensions[1] != outputHeight ||
%(dCdH)s->dimensions[2] != outputWidth ||
%(dCdH)s->dimensions[3] != outputDur)
if (PyArray_DIMS(%(dCdH)s)[0] != batchSize ||
PyArray_DIMS(%(dCdH)s)[4] != outputChannels ||
PyArray_DIMS(%(dCdH)s)[1] != outputHeight ||
PyArray_DIMS(%(dCdH)s)[2] != outputWidth ||
PyArray_DIMS(%(dCdH)s)[3] != outputDur)
{
PyErr_Format(PyExc_ValueError, "dCdH is the wrong size, expected (%%i,%%i,%%i,%%i,%%i), got (%%li,%%li,%%li,%%li,%%li)", batchSize, outputHeight, outputWidth, outputDur, outputChannels, (long)%(dCdH)s->dimensions[0], (long)%(dCdH)s->dimensions[1], (long)%(dCdH)s->dimensions[2], (long)%(dCdH)s->dimensions[3], (long)%(dCdH)s->dimensions[4]);
PyErr_Format(PyExc_ValueError, "dCdH is the wrong size, expected (%%i,%%i,%%i,%%i,%%i), got (%%li,%%li,%%li,%%li,%%li)", batchSize, outputHeight, outputWidth, outputDur, outputChannels, (long)PyArray_DIMS(%(dCdH)s)[0], (long)PyArray_DIMS(%(dCdH)s)[1], (long)PyArray_DIMS(%(dCdH)s)[2], (long)PyArray_DIMS(%(dCdH)s)[3], (long)PyArray_DIMS(%(dCdH)s)[4]);
%(fail)s
}
{ // extra scope for fail
......@@ -215,13 +211,13 @@ class ConvGrad3D(theano.Op):
dims[2] = filterWidth;
dims[3] = filterDur;
if(!(%(dCdW)s) || %(dCdW)s->dimensions[0]!=dims[0] ||
%(dCdW)s->dimensions[1]!=dims[1] ||
%(dCdW)s->dimensions[2]!=dims[2] ||
%(dCdW)s->dimensions[3]!=dims[3] ||
%(dCdW)s->dimensions[4]!=dims[4] ){
if(!(%(dCdW)s) || PyArray_DIMS(%(dCdW)s)[0]!=dims[0] ||
PyArray_DIMS(%(dCdW)s)[1]!=dims[1] ||
PyArray_DIMS(%(dCdW)s)[2]!=dims[2] ||
PyArray_DIMS(%(dCdW)s)[3]!=dims[3] ||
PyArray_DIMS(%(dCdW)s)[4]!=dims[4] ){
Py_XDECREF(%(dCdW)s);
%(dCdW)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, %(V)s->descr->type_num);
%(dCdW)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(V)s)->type_num);
if (!(%(dCdW)s)) {
PyErr_Format(PyExc_MemoryError,"ConvGrad3D: Could not allocate dCdW");
......@@ -230,12 +226,12 @@ class ConvGrad3D(theano.Op):
}
{ //extra scope so fail works
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*x->strides[0]+(j)*x->strides[1]+(k)*x->strides[2]+(l)*x->strides[3]+(m)*x->strides[4] )
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( PyArray_DATA(x) + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )
const int dhs3 = %(dCdH)s->strides[3];
const int dtvs3 = dt * %(V)s->strides[3];
const int dhs3 = PyArray_STRIDES(%(dCdH)s)[3];
const int dtvs3 = dt * PyArray_STRIDES(%(V)s)[3];
// Compute dCdW
//TODO-- see if this can be made faster by using ELEM_AT instead of ELEM5
......@@ -253,8 +249,8 @@ class ConvGrad3D(theano.Op):
for (int i = 0; i < batchSize; i++) {
for (int p = 0; p < outputHeight; p++) {
for (int q = 0; q < outputWidth; q++) {
int Hpos = i * %(dCdH)s->strides[0] + j * %(dCdH)s->strides[4] + p * %(dCdH)s->strides[1] + q * %(dCdH)s->strides[2] ;
int Vpos = i * %(V)s->strides[0] + z * %(V)s->strides[4] + (dr * p+k) * %(V)s->strides[1] + (dc*q+l) * %(V)s->strides[2] + m * %(V)s->strides[3];
int Hpos = i * PyArray_STRIDES(%(dCdH)s)[0] + j * PyArray_STRIDES(%(dCdH)s)[4] + p * PyArray_STRIDES(%(dCdH)s)[1] + q * PyArray_STRIDES(%(dCdH)s)[2] ;
int Vpos = i * PyArray_STRIDES(%(V)s)[0] + z * PyArray_STRIDES(%(V)s)[4] + (dr * p+k) * PyArray_STRIDES(%(V)s)[1] + (dc*q+l) * PyArray_STRIDES(%(V)s)[2] + m * PyArray_STRIDES(%(V)s)[3];
for (int r = 0; r < outputDur; r++) {
writePos += ELEM5(%(dCdH)s,i,p,q,r,j) * ELEM5(%(V)s,i,dr*p+k,dc*q+l,dt*r+m,z);
......
......@@ -35,10 +35,6 @@ class ConvTransp3D(theano.Op):
return theano.Apply(self, inputs=[W_,b_,d_,H_, RShape_], outputs = [ T.TensorType(H_.dtype, (False,False,False,False,False))() ] )
def c_compile_args(self):
flags = ['-Werror']
return flags
def infer_shape(self, node, input_shapes):
W, b, d, H, RShape = node.inputs
W_shape, b_shape, d_shape, H_shape, RShape_shape = input_shapes
......@@ -103,34 +99,36 @@ class ConvTransp3D(theano.Op):
//printf("\t\t\t\tConvTransp3D c code\\n");
//Check dimensionality of inputs
if (%(H)s->nd != 5)
if (PyArray_NDIM(%(H)s) != 5)
{
PyErr_Format(PyExc_ValueError, "H must be a 5-D tensor but it is %%i-D",%(H)s->nd);
PyErr_Format(PyExc_ValueError,
"H must be a 5-D tensor but it is %%i-D",
PyArray_NDIM(%(H)s));
%(fail)s
}
if (%(W)s->nd != 5)
if (PyArray_NDIM(%(W)s) != 5)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: W must be a 5-D tensor");
%(fail)s
}
if (%(b)s->nd != 1)
if (PyArray_NDIM(%(b)s) != 1)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: b must be a vector");
%(fail)s
}
if (%(d)s->nd != 1)
if (PyArray_NDIM(%(d)s) != 1)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: d must be a vector");
%(fail)s
}
//Read and check stride arguments
if (%(d)s->dimensions[0] != 3)
if (PyArray_DIMS(%(d)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", (long)%(d)s->dimensions[0] );
PyErr_Format(PyExc_ValueError, "ConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0] );
%(fail)s
}
......@@ -149,33 +147,33 @@ class ConvTransp3D(theano.Op):
//Read and check sizes of inputs
{ // for fail 2
const int batchSize = %(H)s->dimensions[0];
const int outputChannels = %(W)s->dimensions[0];
const int batchSize = PyArray_DIMS(%(H)s)[0];
const int outputChannels = PyArray_DIMS(%(W)s)[0];
if (%(H)s->dimensions[4] != outputChannels)
if (PyArray_DIMS(%(H)s)[4] != outputChannels)
{
PyErr_Format(PyExc_ValueError, "W produces a %%i channel image but the image has %%li channels. W.shape: (%%li, %%li, %%li, %%li, %%li) H.shape: (%%li, %%li, %%li, %%li, %%li)", outputChannels, (long)%(H)s->dimensions[4], (long)%(W)s->dimensions[0], (long)%(W)s->dimensions[1], (long)%(W)s->dimensions[2], (long)%(W)s->dimensions[3], (long)%(W)s->dimensions[4], (long)%(H)s->dimensions[0], (long)%(H)s->dimensions[1], (long)%(H)s->dimensions[2], (long)%(H)s->dimensions[3], (long)%(H)s->dimensions[4]);
PyErr_Format(PyExc_ValueError, "W produces a %%i channel image but the image has %%li channels. W.shape: (%%li, %%li, %%li, %%li, %%li) H.shape: (%%li, %%li, %%li, %%li, %%li)", outputChannels, (long)PyArray_DIMS(%(H)s)[4], (long)PyArray_DIMS(%(W)s)[0], (long)PyArray_DIMS(%(W)s)[1], (long)PyArray_DIMS(%(W)s)[2], (long)PyArray_DIMS(%(W)s)[3], (long)PyArray_DIMS(%(W)s)[4], (long)PyArray_DIMS(%(H)s)[0], (long)PyArray_DIMS(%(H)s)[1], (long)PyArray_DIMS(%(H)s)[2], (long)PyArray_DIMS(%(H)s)[3], (long)PyArray_DIMS(%(H)s)[4]);
%(fail)s
}
{ // for fail 3
const int inputChannels = %(W)s->dimensions[4];
const int inputChannels = PyArray_DIMS(%(W)s)[4];
if (%(b)s->dimensions[0] != inputChannels)
if (PyArray_DIMS(%(b)s)[0] != inputChannels)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: b operates on a %%li channel image but the image has %%i channels", (long)%(b)s->dimensions[0], inputChannels );
PyErr_Format(PyExc_ValueError, "ConvTransp3D: b operates on a %%li channel image but the image has %%i channels", (long)PyArray_DIMS(%(b)s)[0], inputChannels );
%(fail)s
}
{ // for fail 4
const int filterHeight = %(W)s->dimensions[1];
const int filterWidth = %(W)s->dimensions[2];
const int filterDur = %(W)s->dimensions[3];
const int outputHeight = %(H)s->dimensions[1];
const int outputWidth = %(H)s->dimensions[2];
const int outputDur = %(H)s->dimensions[3];
const int filterHeight = PyArray_DIMS(%(W)s)[1];
const int filterWidth = PyArray_DIMS(%(W)s)[2];
const int filterDur = PyArray_DIMS(%(W)s)[3];
const int outputHeight = PyArray_DIMS(%(H)s)[1];
const int outputWidth = PyArray_DIMS(%(H)s)[2];
const int outputDur = PyArray_DIMS(%(H)s)[3];
int videoHeight = (outputHeight-1) * dr + filterHeight;
int videoWidth = (outputWidth-1) * dc + filterWidth;
......@@ -183,13 +181,13 @@ class ConvTransp3D(theano.Op):
if (%(RShape)s)
{
if (%(RShape)s->nd != 1)
if (PyArray_NDIM(%(RShape)s) != 1)
{
PyErr_Format(PyExc_ValueError, "ConvTransp3D: RShape must be a vector");
%(fail)s
}
if (%(RShape)s->dimensions[0] != 3)
if (PyArray_DIMS(%(RShape)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError, "RShape must specify a 3D shape ( [height,width,duration] )");
%(fail)s
......@@ -223,14 +221,14 @@ class ConvTransp3D(theano.Op):
dims[2] = videoWidth;
dims[3] = videoDur;
if(!(%(R)s) || %(R)s->dimensions[0]!=dims[0] ||
%(R)s->dimensions[1]!=dims[1] ||
%(R)s->dimensions[2]!=dims[2] ||
%(R)s->dimensions[3]!=dims[3] ||
%(R)s->dimensions[4]!=dims[4])
if(!(%(R)s) || PyArray_DIMS(%(R)s)[0]!=dims[0] ||
PyArray_DIMS(%(R)s)[1]!=dims[1] ||
PyArray_DIMS(%(R)s)[2]!=dims[2] ||
PyArray_DIMS(%(R)s)[3]!=dims[3] ||
PyArray_DIMS(%(R)s)[4]!=dims[4])
{
Py_XDECREF(%(R)s);
%(R)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, %(H)s->descr->type_num);
%(R)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(H)s)->type_num);
if (!(%(R)s)) {
PyErr_Format(PyExc_MemoryError, "ConvTransp3D: could not allocate R");
%(fail)s
......@@ -239,17 +237,17 @@ class ConvTransp3D(theano.Op):
{ // for fail 6
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*x->strides[0]+(j)*x->strides[1]+(k)*x->strides[2]+(l)*x->strides[3]+(m)*x->strides[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( PyArray_DATA(x) + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )
dtype_%(b)s * b = (dtype_%(b)s *) %(b)s->data;
dtype_%(b)s * b = (dtype_%(b)s *) PyArray_DATA(%(b)s);
int rs4 = %(R)s->strides[4];
int ws0 = %(W)s->strides[0];
int ws4 = %(W)s->strides[4];
int hs4 = %(H)s->strides[4];
int rs4 = PyArray_STRIDES(%(R)s)[4];
int ws0 = PyArray_STRIDES(%(W)s)[0];
int ws4 = PyArray_STRIDES(%(W)s)[4];
int hs4 = PyArray_STRIDES(%(H)s)[4];
// Compute R
// R[i,r,c,t,j] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, rk, ck, tk,j] * H[i,rc,cc,tc,k]
......@@ -262,7 +260,7 @@ class ConvTransp3D(theano.Op):
for (int t = 0; t < videoDur; t++) {
const int ftc = (int)std::max(0.0f, ceilf(float(t-filterDur +1) /float(dt)));
long long Rpost = i * %(R)s->strides[0] + r * %(R)s->strides[1] + c * %(R)s->strides[2] + t * %(R)s->strides[3];
long long Rpost = i * PyArray_STRIDES(%(R)s)[0] + r * PyArray_STRIDES(%(R)s)[1] + c * PyArray_STRIDES(%(R)s)[2] + t * PyArray_STRIDES(%(R)s)[3];
long long Rpos = Rpost;
for (int j = 0; j < inputChannels; j++)
......@@ -286,8 +284,8 @@ class ConvTransp3D(theano.Op):
const int tk = t - tc * dt;
if (tk < 0) break;
int Wpos = rk * %(W)s->strides[1] + ck * %(W)s->strides[2] + tk * %(W)s->strides[3];
int Hpostc = i * %(H)s->strides[0] + rc * %(H)s->strides[1] + cc * %(H)s->strides[2] + tc * %(H)s->strides[3];
int Wpos = rk * PyArray_STRIDES(%(W)s)[1] + ck * PyArray_STRIDES(%(W)s)[2] + tk * PyArray_STRIDES(%(W)s)[3];
int Hpostc = i * PyArray_STRIDES(%(H)s)[0] + rc * PyArray_STRIDES(%(H)s)[1] + cc * PyArray_STRIDES(%(H)s)[2] + tc * PyArray_STRIDES(%(H)s)[3];
Rpos = Rpost;
for (int j = 0; j < inputChannels; j++)
{
......
......@@ -970,7 +970,7 @@ class ConvOp(OpenMPOp):
def c_support_code(self):
return """
#define STRIDES(arr) ((arr)->strides)
#define STRIDES(arr) (PyArray_STRIDES(arr))
#define FULL 2
#define SAME 1
#define VALID 0
......@@ -1159,15 +1159,15 @@ if(kerns_dim[3] %% %(self_kshp1)s!=0){
""" % (locals())
else:
d["self_bsize"] = "%(img2d)s->dimensions[0]" % d
d["self_nkern"] = "%(filtersflipped)s->dimensions[0]" % d
d["self_bsize"] = "PyArray_DIMS(%(img2d)s)[0]" % d
d["self_nkern"] = "PyArray_DIMS(%(filtersflipped)s)[0]" % d
d["self_outshp0"] = "-1"
d["self_outshp1"] = "-1"
d["self_imshp0"] = "%(img2d)s->dimensions[1]" % d
d["self_imshp1"] = "%(img2d)s->dimensions[2]" % d
d["self_imshp2"] = "%(img2d)s->dimensions[3]" % d
d["self_kshp0"] = "%(filtersflipped)s->dimensions[2]" % d
d["self_kshp1"] = "%(filtersflipped)s->dimensions[3]" % d
d["self_imshp0"] = "PyArray_DIMS(%(img2d)s)[1]" % d
d["self_imshp1"] = "PyArray_DIMS(%(img2d)s)[2]" % d
d["self_imshp2"] = "PyArray_DIMS(%(img2d)s)[3]" % d
d["self_kshp0"] = "PyArray_DIMS(%(filtersflipped)s)[2]" % d
d["self_kshp1"] = "PyArray_DIMS(%(filtersflipped)s)[3]" % d
d["affectation"] = "+="
d["all_shape"] = "0"
d["dim_zz_const"] = ""
......@@ -1242,7 +1242,7 @@ _conv_op_code_a = """
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL;
PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL;
PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
const %(type)s fill_value = 0;
int type_im=PyArray_TYPE(%(img2d)s);
......@@ -1266,35 +1266,35 @@ kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(%(img2d)s->nd==2){
img2d_dim[3]=%(img2d)s->dimensions[1];
img2d_dim[2]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==3){
img2d_dim[3]=%(img2d)s->dimensions[2];
img2d_dim[2]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==4){
img2d_dim[3]=%(img2d)s->dimensions[3];
img2d_dim[2]=%(img2d)s->dimensions[2];
img2d_dim[1]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)s;
}
if(%(filtersflipped)s->nd==3){
kerns_dim[3]=%(filtersflipped)s->dimensions[2];
kerns_dim[2]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else if(%(filtersflipped)s->nd==4){
kerns_dim[3]=%(filtersflipped)s->dimensions[3];
kerns_dim[2]=%(filtersflipped)s->dimensions[2];
kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{
std::stringstream temp;
temp << "nddim="<<%(filtersflipped)s->nd;
temp << "nddim="<<PyArray_NDIM(%(filtersflipped)s);
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str());
......@@ -1303,33 +1303,33 @@ if(%(filtersflipped)s->nd==3){
%(assert_size)s
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError,
......@@ -1348,10 +1348,10 @@ if (!img2d) %(fail)s;
if (!filtersflipped) %(fail)s;
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(self_bsize)s)
||(%(z)s->dimensions[1] != %(self_nkern)s)
||(%(z)s->dimensions[2] != dim_zz[0])
|| (%(z)s->dimensions[3] != dim_zz[1])
||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
)
{
{Py_XDECREF(%(z)s);}
......@@ -1364,36 +1364,37 @@ if ((!%(z)s)
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
int Os[2];
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//assertions
if (%(z)s->strides[0] != %(z)s->dimensions[1] *
%(z)s->dimensions[2] *
%(z)s->dimensions[3] *
if (PyArray_STRIDES(%(z)s)[0] != PyArray_DIMS(%(z)s)[1] *
PyArray_DIMS(%(z)s)[2] *
PyArray_DIMS(%(z)s)[3] *
(npy_intp)sizeof(%(type)s))
%(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] *
%(z)s->dimensions[3] *
if (PyArray_STRIDES(%(z)s)[1] != PyArray_DIMS(%(z)s)[2] *
PyArray_DIMS(%(z)s)[3] *
(npy_intp)sizeof(%(type)s))
%(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s))
if (PyArray_STRIDES(%(z)s)[2] != PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s))
%(fail)s;
if (%(z)s->strides[3] != (npy_intp)sizeof(%(type)s))
if (PyArray_STRIDES(%(z)s)[3] != (npy_intp)sizeof(%(type)s))
%(fail)s;
for(int b=0;b< %(self_bsize)s;b++){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(%(z)s,b,n_kern));
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
......@@ -1514,7 +1515,7 @@ Py_XDECREF(filtersflipped);
_conv_op_code_valid_gemm = """
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *img2d_arr=NULL;
PyArrayObject *ain1=NULL, *ain2=NULL, *img2d_arr=NULL, *z_arr=NULL;
const int NKERN = %(self_nkern)s;
int type_im=PyArray_TYPE(%(img2d)s);
......@@ -1536,35 +1537,35 @@ kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig;
if(%(img2d)s->nd==2){
img2d_dim[3]=%(img2d)s->dimensions[1];
img2d_dim[2]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==3){
img2d_dim[3]=%(img2d)s->dimensions[2];
img2d_dim[2]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==4){
img2d_dim[3]=%(img2d)s->dimensions[3];
img2d_dim[2]=%(img2d)s->dimensions[2];
img2d_dim[1]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)s;
}
if(%(filtersflipped)s->nd==3){
kerns_dim[3]=%(filtersflipped)s->dimensions[2];
kerns_dim[2]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else if(%(filtersflipped)s->nd==4){
kerns_dim[3]=%(filtersflipped)s->dimensions[3];
kerns_dim[2]=%(filtersflipped)s->dimensions[2];
kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{
std::stringstream temp;
temp << "nddim="<<%(filtersflipped)s->nd;
temp << "nddim="<<PyArray_NDIM(%(filtersflipped)s);
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("kernel don't have a good shape. " + param).c_str());
......@@ -1576,14 +1577,14 @@ if (NKERN != kerns_dim[0])
%(fail)s;
}
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
......@@ -1601,10 +1602,10 @@ if (!img2d) {
}
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(self_bsize)s)
||(%(z)s->dimensions[1] != %(self_nkern)s)
||(%(z)s->dimensions[2] != dim_zz[0])
|| (%(z)s->dimensions[3] != dim_zz[1])
||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
)
{
{Py_XDECREF(%(z)s);}
......@@ -1617,6 +1618,7 @@ if ((!%(z)s)
}else{
PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
%(assert_size)s
......@@ -1637,7 +1639,7 @@ for(int i=0;i < kerns_dim[0];++i){
for(int j=0;j < kerns_dim[1];++j){
for(int k=0;k < kerns_dim[2];++k){
for(int l=0;l < kerns_dim[3];++l){
%(type)s * ff = ((%(filtersflipped)s)->nd == 3)
%(type)s * ff = ((PyArray_NDIM(%(filtersflipped)s)) == 3)
? (%(type)s *)PyArray_GETPTR3(%(filtersflipped)s, i, kerns_dim[2]-1-k, kerns_dim[3]-1-l)
: (%(type)s *)PyArray_GETPTR4(%(filtersflipped)s, i, j, kerns_dim[2]-1-k, kerns_dim[3]-1-l);
myfilters[i * (kerns_dim[1]*kerns_dim[2]*kerns_dim[3])
......@@ -1672,7 +1674,7 @@ for(int b=0;b< %(self_bsize)s;b++){
int imgview_stride = dim_im[1];
int filter_rows_stride =kerns_dim[1]*kerns_dim[2]*kerns_dim[3];
//remember, Fortran wants a column-major interpretation
assert(img2d->strides[3] == (npy_intp)sizeof(%(type)s));
assert(PyArray_STRIDES(img2d)[3] == (npy_intp)sizeof(%(type)s));
if (0){
std::cerr << "b " << b << " img_col " << img_col << " filterrow " << filter_row << " stackidx " <<stackidx << "\\n";
......@@ -1717,10 +1719,10 @@ for(int b=0;b< %(self_bsize)s;b++){
%(type)s * z_p = (%(type)s *)PyArray_GETPTR4(%(z)s, b, kernel_idx, img_row, img_col);
if (0)
{
if (b >= %(z)s->dimensions[0]) %(fail)s;
if (kernel_idx >= %(z)s->dimensions[1]) %(fail)s;
if (img_row >= %(z)s->dimensions[2]) %(fail)s;
if (img_col >= %(z)s->dimensions[3]) %(fail)s;
if (b >= PyArray_DIMS(%(z)s)[0]) %(fail)s;
if (kernel_idx >= PyArray_DIMS(%(z)s)[1]) %(fail)s;
if (img_row >= PyArray_DIMS(%(z)s)[2]) %(fail)s;
if (img_col >= PyArray_DIMS(%(z)s)[3]) %(fail)s;
}
z_p[0] += kbuf[img_row * kbufstride + kernel_idx];
}
......@@ -1766,7 +1768,7 @@ def gen_conv_code_unroll_batch_kern(d, unroll_bsize=1, unroll_ksize=1):
ret = """
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;;
const %(type)s fill_value = 0;
int type_im=PyArray_TYPE(%(img2d)s);
......@@ -1788,36 +1790,36 @@ kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(%(img2d)s->nd==2){
img2d_dim[3]=%(img2d)s->dimensions[1];
img2d_dim[2]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==3){
img2d_dim[3]=%(img2d)s->dimensions[2];
img2d_dim[2]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==4){
img2d_dim[3]=%(img2d)s->dimensions[3];
img2d_dim[2]=%(img2d)s->dimensions[2];
img2d_dim[1]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else {
std::stringstream temp;
temp << "nddim="<<%(img2d)s->nd;
temp << "nddim="<<PyArray_NDIM(%(img2d)s);
std::string param = temp.str();
PyErr_SetString(PyExc_ValueError,
("img don't have a good shape. " + param).c_str());
%(fail)s;
}
if(%(filtersflipped)s->nd==3){
kerns_dim[3]=%(filtersflipped)s->dimensions[2];
kerns_dim[2]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else if(%(filtersflipped)s->nd==4){
kerns_dim[3]=%(filtersflipped)s->dimensions[3];
kerns_dim[2]=%(filtersflipped)s->dimensions[2];
kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{
PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
%(fail)s;
......@@ -1825,33 +1827,33 @@ if(%(filtersflipped)s->nd==3){
%(assert_size)s
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
......@@ -1865,10 +1867,10 @@ if (!img2d) %(fail)s;
if (!filtersflipped) %(fail)s;
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(self_bsize)s)
||(%(z)s->dimensions[1] != %(self_nkern)s)
||(%(z)s->dimensions[2] != dim_zz[0])
|| (%(z)s->dimensions[3] != dim_zz[1])
||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
)
{
{Py_XDECREF(%(z)s);}
......@@ -1881,28 +1883,29 @@ if ((!%(z)s)
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
int Os[2];
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//assertions
if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != (npy_intp)sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[0] != PyArray_DIMS(%(z)s)[1] *PyArray_DIMS(%(z)s)[2] *PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[1] != PyArray_DIMS(%(z)s)[2] * PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[2] != PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[3] != (npy_intp)sizeof(%(type)s)) %(fail)s;
for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
""" % d
ret += my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(%(z)s,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));")
ret += my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(z_arr,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));")
ret += my_dup("for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;", unroll_bsize * unroll_ksize)
ret += """
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
""" % d
ret += my_dup("const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d,b+%(unroll_iter)s,stack_size));", unroll_bsize)
ret += my_dup("const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern+%(unroll_iter)s,stack_size));", unroll_ksize)
ret += my_dup("const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b+%(unroll_iter)s,stack_size));", unroll_bsize)
ret += my_dup("const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern+%(unroll_iter)s,stack_size));", unroll_ksize)
ret += """
int new_m;
......@@ -1999,7 +2002,7 @@ Py_XDECREF(filtersflipped);
_conv_op_code_unroll_patch = """
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
const %(type)s fill_value = 0;//only value of 0 are currently tested and correctly implemented
int type_im=PyArray_TYPE(%(img2d)s);
......@@ -2025,68 +2028,68 @@ kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
if(%(img2d)s->nd==2){
img2d_dim[3]=%(img2d)s->dimensions[1];
img2d_dim[2]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==3){
img2d_dim[3]=%(img2d)s->dimensions[2];
img2d_dim[2]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==4){
img2d_dim[3]=%(img2d)s->dimensions[3];
img2d_dim[2]=%(img2d)s->dimensions[2];
img2d_dim[1]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
if(PyArray_NDIM(%(img2d)s)==2){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==3){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else if(PyArray_NDIM(%(img2d)s)==4){
img2d_dim[3]=PyArray_DIMS(%(img2d)s)[3];
img2d_dim[2]=PyArray_DIMS(%(img2d)s)[2];
img2d_dim[1]=PyArray_DIMS(%(img2d)s)[1];
img2d_dim[0]=PyArray_DIMS(%(img2d)s)[0];
}else {
PyErr_Format(PyExc_ValueError,
"image don't have a good number of dimensions %%d. ", %(filtersflipped)s->nd);
"image don't have a good number of dimensions %%d. ", PyArray_NDIM(%(filtersflipped)s));
%(fail)s;
}
if(%(filtersflipped)s->nd==3){
kerns_dim[3]=%(filtersflipped)s->dimensions[2];
kerns_dim[2]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else if(%(filtersflipped)s->nd==4){
kerns_dim[3]=%(filtersflipped)s->dimensions[3];
kerns_dim[2]=%(filtersflipped)s->dimensions[2];
kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
if(PyArray_NDIM(%(filtersflipped)s)==3){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else if(PyArray_NDIM(%(filtersflipped)s)==4){
kerns_dim[3]=PyArray_DIMS(%(filtersflipped)s)[3];
kerns_dim[2]=PyArray_DIMS(%(filtersflipped)s)[2];
kerns_dim[1]=PyArray_DIMS(%(filtersflipped)s)[1];
kerns_dim[0]=PyArray_DIMS(%(filtersflipped)s)[0];
}else{
PyErr_Format(PyExc_ValueError,
"kernel don't have a good number of dimensions %%d. ", %(filtersflipped)s->nd);
"kernel don't have a good number of dimensions %%d. ", PyArray_NDIM(%(filtersflipped)s));
%(fail)s;
}
%(assert_size)s
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(%(type)s))){
if ((PyArray_STRIDES(img2d_arr)[3] != sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(%(type)s))){
if ((PyArray_STRIDES(filtersflipped_arr)[3] != sizeof(%(type)s))
|| (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
......@@ -2107,10 +2110,10 @@ if (!img2d) %(fail)s;
if (!filtersflipped) %(fail)s;
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(self_bsize)s)
||(%(z)s->dimensions[1] != %(self_nkern)s)
||(%(z)s->dimensions[2] != dim_zz[0])
|| (%(z)s->dimensions[3] != dim_zz[1])
||(PyArray_DIMS(%(z)s)[0] != %(self_bsize)s)
||(PyArray_DIMS(%(z)s)[1] != %(self_nkern)s)
||(PyArray_DIMS(%(z)s)[2] != dim_zz[0])
|| (PyArray_DIMS(%(z)s)[3] != dim_zz[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
......@@ -2124,12 +2127,13 @@ if ((!%(z)s)
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
//assertions
if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[0] != PyArray_DIMS(%(z)s)[1] *PyArray_DIMS(%(z)s)[2] *PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[1] != PyArray_DIMS(%(z)s)[2] * PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[2] != PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[3] != sizeof(%(type)s)) %(fail)s;
//The if on the number of loop make a speed up for small array.
//with g++ 4.5.1. The compiler should be smart enough to do this himself!
......@@ -2144,13 +2148,13 @@ for(int batch_kern_idx=0;
int b = batch_kern_idx / %(self_nkern)s;
int n_kern = batch_kern_idx %% %(self_nkern)s;
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(%(z)s,b,n_kern));
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
int new_m;
......
......@@ -105,41 +105,41 @@ class SoftmaxWithBias(gof.Op):
#TODO: use this to accept float32 and int32: node.inputs[0].type.dtype_specs()[1]
init_decl = """
npy_intp* Nx = %(x)s->dimensions;
npy_intp* Nx = PyArray_DIMS(%(x)s);
if (%(x)s->nd != 2)
if (PyArray_NDIM(%(x)s) != 2)
{
PyErr_SetString(PyExc_ValueError, "a not 2d tensor");
%(fail)s;
}
if (%(b)s->nd != 1)
if (PyArray_NDIM(%(b)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
%(fail)s;
}
if ((%(x)s->descr->type_num != PyArray_DOUBLE) &&
(%(x)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(x)s)->type_num != NPY_DOUBLE) &&
(PyArray_DESCR(%(x)s)->type_num != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError, "a not float");
%(fail)s;
}
if ((%(b)s->descr->type_num != PyArray_DOUBLE) &&
(%(b)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(b)s)->type_num != NPY_DOUBLE) &&
(PyArray_DESCR(%(b)s)->type_num != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError, "b not float");
%(fail)s;
}
if ((%(x)s->dimensions[1] != %(b)s->dimensions[0]))
if ((PyArray_DIMS(%(x)s)[1] != PyArray_DIMS(%(b)s)[0]))
{
PyErr_Format(PyExc_ValueError,
"number of columns in x (%%ld) does not match length of b (%%ld)",
(long int)%(x)s->dimensions[1], (long int)%(b)s->dimensions[0]);
(long int)PyArray_DIMS(%(x)s)[1], (long int)PyArray_DIMS(%(b)s)[0]);
%(fail)s;
}
if ((NULL == %(sm)s)
|| (%(sm)s->dimensions[0] != %(x)s->dimensions[0])
|| (%(sm)s->dimensions[1] != %(x)s->dimensions[1]))
|| (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
|| (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
{
if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
%(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s),
......@@ -159,15 +159,15 @@ class SoftmaxWithBias(gof.Op):
double sum = 0.0;
bool discount_max = false;
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * i);
const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(%(b)s->data);
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i);
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_DATA(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_DATA(%(b)s));
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_DATA(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
"""
inside_row_loop = """
npy_intp Sx = %(x)s->strides[1]/sizeof(dtype_%(x)s);
npy_intp Sb = %(b)s->strides[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
size_t row_max_j=0;
dtype_%(sm)s row_max = x_i[0] + b_i[0];
......@@ -263,34 +263,34 @@ class SoftmaxGrad(gof.Op):
dy, sm = inp
dx, = out
return '''
if ((%(dy)s->descr->type_num != PyArray_DOUBLE) &&
(%(dy)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(dy)s)->type_num != NPY_DOUBLE) &&
(PyArray_DESCR(%(dy)s)->type_num != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError,
"types should be float or float64");
%(fail)s;
}
if ((%(sm)s->descr->type_num != PyArray_DOUBLE) &&
(%(sm)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(sm)s)->type_num != NPY_DOUBLE) &&
(PyArray_DESCR(%(sm)s)->type_num != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError,
"types should be float or float64");
%(fail)s;
}
if ((%(dy)s->nd != 2)
|| (%(sm)s->nd != 2))
if ((PyArray_NDIM(%(dy)s) != 2)
|| (PyArray_NDIM(%(sm)s) != 2))
{
PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s;
}
if (%(dy)s->dimensions[0] != %(sm)s->dimensions[0])
if (PyArray_DIMS(%(dy)s)[0] != PyArray_DIMS(%(sm)s)[0])
{
PyErr_SetString(PyExc_ValueError, "dy.shape[0] != sm.shape[0]");
%(fail)s;
}
if ((NULL == %(dx)s)
|| (%(dx)s->dimensions[0] != %(sm)s->dimensions[0])
|| (%(dx)s->dimensions[1] != %(sm)s->dimensions[1]))
|| (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
|| (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
{
Py_XDECREF(%(dx)s);
%(dx)s = (PyArrayObject*) PyArray_SimpleNew(2,
......@@ -304,22 +304,22 @@ class SoftmaxGrad(gof.Op):
}
}
for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
{
const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (%(dy)s->data + %(dy)s->strides[0] * i);
npy_intp Sdy = %(dy)s->strides[1]/sizeof(dtype_%(dy)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*) (%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s);
const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (PyArray_DATA(%(dy)s) + PyArray_STRIDES(%(dy)s)[0] * i);
npy_intp Sdy = PyArray_STRIDES(%(dy)s)[1]/sizeof(dtype_%(dy)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (PyArray_DATA(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*) (PyArray_DATA(%(dx)s) + PyArray_STRIDES(%(dx)s)[0] * i);
npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
double sum_dy_times_sm = 0.;
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
{
dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm];
sum_dy_times_sm += dx_i[j * Sdx];
}
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
{
dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm];
}
......@@ -773,31 +773,31 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
SoftmaxWithBias.c_code_template()
return (init_decl,
"""
if (%(y_idx)s->nd != 1)
if (PyArray_NDIM(%(y_idx)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
%(fail)s;
}
if ((%(y_idx)s->descr->type_num != PyArray_INT64)
&& (%(y_idx)s->descr->type_num != PyArray_INT32)
&& (%(y_idx)s->descr->type_num != PyArray_INT16)
&& (%(y_idx)s->descr->type_num != PyArray_INT8))
if ((PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT64)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT32)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT16)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT8))
{
PyErr_SetString(PyExc_TypeError,
"y_idx not int8, int16, int32, or int64");
%(fail)s;
}
if (%(x)s->dimensions[0] != %(y_idx)s->dimensions[0])
if (PyArray_DIMS(%(x)s)[0] != PyArray_DIMS(%(y_idx)s)[0])
{
PyErr_Format(PyExc_ValueError,
"number of rows in x (%%ld) does not match length of y (%%ld)",
(long int)%(x)s->dimensions[0],
(long int)%(y_idx)s->dimensions[0]);
(long int)PyArray_DIMS(%(x)s)[0],
(long int)PyArray_DIMS(%(y_idx)s)[0]);
%(fail)s;
}
if ((NULL == %(nll)s) //initial condition
|| (%(nll)s->dimensions[0] != %(y_idx)s->dimensions[0]))
|| (PyArray_DIMS(%(nll)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
{
if (NULL != %(nll)s) Py_XDECREF(%(nll)s);
%(nll)s = (PyArrayObject*)PyArray_SimpleNew(1,
......@@ -810,7 +810,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
}
}
if ((NULL == %(am)s)
|| (%(am)s->dimensions[0] != %(y_idx)s->dimensions[0]))
|| (PyArray_DIMS(%(am)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
{
Py_XDECREF(%(am)s);
%(am)s = (PyArrayObject*) PyArray_SimpleNew(1,
......@@ -825,13 +825,13 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
""",
begin_row_loop,
"""
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(%(nll)s->data + %(nll)s->strides[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_DATA(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(PyArray_DATA(%(nll)s) + PyArray_STRIDES(%(nll)s)[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (PyArray_DATA(%(am)s) + PyArray_STRIDES(%(am)s)[0] * i);
""",
inside_row_loop,
"""
if ((y_i >= %(x)s->dimensions[1]) || (y_i < 0))
if ((y_i >= PyArray_DIMS(%(x)s)[1]) || (y_i < 0))
{
PyErr_SetString(PyExc_ValueError, "y_i value out of bounds");
%(fail)s;
......@@ -914,55 +914,55 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
y_idx_type = node.inputs[2].type.dtype_specs()[1]
return """
if ((%(dnll)s->descr->type_num != PyArray_DOUBLE) &&
(%(dnll)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(dnll)s)->type_num != NPY_DOUBLE) &&
(PyArray_DESCR(%(dnll)s)->type_num != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError,
"dnll type should be float32 or float64");
%(fail)s;
}
if ((%(sm)s->descr->type_num != PyArray_DOUBLE) &&
(%(sm)s->descr->type_num != PyArray_FLOAT))
if ((PyArray_DESCR(%(sm)s)->type_num != NPY_DOUBLE) &&
(PyArray_DESCR(%(sm)s)->type_num != NPY_FLOAT))
{
PyErr_SetString(PyExc_TypeError,
"sm type should be float32 or float64");
%(fail)s;
}
if ((%(y_idx)s->descr->type_num != PyArray_INT64)
&& (%(y_idx)s->descr->type_num != PyArray_INT32)
&& (%(y_idx)s->descr->type_num != PyArray_INT16)
&& (%(y_idx)s->descr->type_num != PyArray_INT8))
if ((PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT64)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT32)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT16)
&& (PyArray_DESCR(%(y_idx)s)->type_num != NPY_INT8))
{
PyErr_SetString(PyExc_TypeError,
"y_idx not int8, int16, int32, or int64");
%(fail)s;
}
if ((%(dnll)s->nd != 1)
|| (%(sm)s->nd != 2)
|| (%(y_idx)s->nd != 1))
if ((PyArray_NDIM(%(dnll)s) != 1)
|| (PyArray_NDIM(%(sm)s) != 2)
|| (PyArray_NDIM(%(y_idx)s) != 1))
{
PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s;
}
if (%(dnll)s->dimensions[0] != %(sm)s->dimensions[0])
if (PyArray_DIMS(%(dnll)s)[0] != PyArray_DIMS(%(sm)s)[0])
{
PyErr_Format(PyExc_ValueError,
"dnll.shape[0] (%%ld) != sm.shape[0] (%%ld)",
(long int)%(dnll)s->dimensions[0],
(long int)%(sm)s->dimensions[0]);
(long int)PyArray_DIMS(%(dnll)s)[0],
(long int)PyArray_DIMS(%(sm)s)[0]);
%(fail)s;
}
if (%(dnll)s->dimensions[0] != %(y_idx)s->dimensions[0])
if (PyArray_DIMS(%(dnll)s)[0] != PyArray_DIMS(%(y_idx)s)[0])
{
PyErr_Format(PyExc_ValueError,
"dnll.shape[0] (%%ld) != y_idx.shape[0] (%%ld)",
(long int)%(dnll)s->dimensions[0],
(long int)%(y_idx)s->dimensions[0]);
(long int)PyArray_DIMS(%(dnll)s)[0],
(long int)PyArray_DIMS(%(y_idx)s)[0]);
%(fail)s;
}
if ((NULL == %(dx)s)
|| (%(dx)s->dimensions[0] != %(sm)s->dimensions[0])
|| (%(dx)s->dimensions[1] != %(sm)s->dimensions[1]))
|| (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
|| (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
{
if (NULL != %(dx)s) Py_XDECREF(%(dx)s);
%(dx)s = (PyArrayObject*) PyArray_SimpleNew(2,
......@@ -975,23 +975,23 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
}
}
for (size_t i = 0; i < %(dx)s->dimensions[0]; ++i)
for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
{
const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];
const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(PyArray_DATA(%(dnll)s) + PyArray_STRIDES(%(dnll)s)[0] * i))[0];
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_DATA(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_DATA(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(PyArray_DATA(%(dx)s) + PyArray_STRIDES(%(dx)s)[0] * i);
npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
for (size_t j = 0; j < %(dx)s->dimensions[1]; ++j)
for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
{
dx_i[j * Sdx] = dnll_i * sm_i[j * Ssm];
}
if (y_i >= %(dx)s->dimensions[1])
if (y_i >= PyArray_DIMS(%(dx)s)[1])
{
PyErr_SetString(PyExc_ValueError, "y_i >= dx dimensions[1]");
%(fail)s;
......
......@@ -619,15 +619,15 @@ class Shape_i(T.Op):
if isinstance(node.inputs[0].type, T.TensorType):
return """
if(!%(out)s)
%(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, PyArray_INT64, 0);
((npy_int64*)PyArray_DATA(%(out)s))[0]=%(x)s->dimensions[%(i)s];
%(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(out)s))[0]=PyArray_DIMS(%(x)s)[%(i)s];
""" % locals()
elif node.inputs[0].type.__class__.__name__ == "CudaNdarrayType":
#Don't want to import cuda stuff here.
return """
if(!%(out)s)
%(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, PyArray_INT64, 0);
%(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(out)s))[0]=
CudaNdarray_HOST_DIMS(%(x)s)[%(i)s];
""" % locals()
......
......@@ -176,13 +176,13 @@ class DownsampleFactorMax(Op):
int x_shp0_usable;
int x_shp1_usable;
int z_shp0, z_shp1;
if(%(x)s->nd!=4)
if(PyArray_NDIM(%(x)s)!=4)
{
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
%(fail)s;
}
z_shp0 = %(x)s->dimensions[2] / %(ds0)s;
z_shp1 = %(x)s->dimensions[3] / %(ds1)s;
z_shp0 = PyArray_DIMS(%(x)s)[2] / %(ds0)s;
z_shp1 = PyArray_DIMS(%(x)s)[3] / %(ds1)s;
if (%(ignore_border)s)
{
x_shp0_usable = z_shp0 * %(ds0)s;
......@@ -190,23 +190,23 @@ class DownsampleFactorMax(Op):
}
else
{
z_shp0 += (%(x)s->dimensions[2] %% %(ds0)s) ? 1 : 0;
z_shp1 += (%(x)s->dimensions[3] %% %(ds1)s) ? 1 : 0;
x_shp0_usable = %(x)s->dimensions[2];
x_shp1_usable = %(x)s->dimensions[3];
z_shp0 += (PyArray_DIMS(%(x)s)[2] %% %(ds0)s) ? 1 : 0;
z_shp1 += (PyArray_DIMS(%(x)s)[3] %% %(ds1)s) ? 1 : 0;
x_shp0_usable = PyArray_DIMS(%(x)s)[2];
x_shp1_usable = PyArray_DIMS(%(x)s)[3];
}
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(x)s->dimensions[0])
||(%(z)s->dimensions[1] != %(x)s->dimensions[1])
||(%(z)s->dimensions[2] != z_shp0)
||(%(z)s->dimensions[3] != z_shp1)
||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(x)s)[0])
||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(x)s)[1])
||(PyArray_DIMS(%(z)s)[2] != z_shp0)
||(PyArray_DIMS(%(z)s)[3] != z_shp1)
)
{
if (%(z)s) Py_XDECREF(%(z)s);
npy_intp dims[4] = {0,0,0,0};
dims[0]=%(x)s->dimensions[0];
dims[1]=%(x)s->dimensions[1];
dims[0]=PyArray_DIMS(%(x)s)[0];
dims[1]=PyArray_DIMS(%(x)s)[1];
dims[2]=z_shp0;
dims[3]=z_shp1;
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0); //TODO: zeros not necessary
......@@ -214,8 +214,8 @@ class DownsampleFactorMax(Op):
if (z_shp0 && z_shp1)
{
for(int b=0;b<%(x)s->dimensions[0];b++){
for(int k=0;k<%(x)s->dimensions[1];k++){
for(int b=0;b<PyArray_DIMS(%(x)s)[0];b++){
for(int k=0;k<PyArray_DIMS(%(x)s)[1];k++){
int mini_i = 0;
int zi = 0;
for(int i=0;i< x_shp0_usable; i++){
......@@ -306,23 +306,23 @@ class DownsampleFactorMaxGrad(Op):
PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)s;
}
if(%(x)s->nd!=4)
if(PyArray_NDIM(%(x)s)!=4)
{
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
%(fail)s;
}
if(%(z)s->nd!=4)
if(PyArray_NDIM(%(z)s)!=4)
{
PyErr_SetString(PyExc_ValueError, "z must be a 4d ndarray");
%(fail)s;
}
if(%(gz)s->nd!=4)
if(PyArray_NDIM(%(gz)s)!=4)
{
PyErr_SetString(PyExc_ValueError, "gz must be a 4d ndarray");
%(fail)s;
}
z_shp0 = %(z)s->dimensions[2];
z_shp1 = %(z)s->dimensions[3];
z_shp0 = PyArray_DIMS(%(z)s)[2];
z_shp1 = PyArray_DIMS(%(z)s)[3];
if (%(ignore_border)s)
{
x_shp0_usable = z_shp0 * %(ds0)s;
......@@ -330,23 +330,23 @@ class DownsampleFactorMaxGrad(Op):
}
else
{
x_shp0_usable = %(x)s->dimensions[2];
x_shp1_usable = %(x)s->dimensions[3];
x_shp0_usable = PyArray_DIMS(%(x)s)[2];
x_shp1_usable = PyArray_DIMS(%(x)s)[3];
}
if ((!%(gx)s)
|| *PyArray_DIMS(%(gx)s)!=4
||(%(gx)s->dimensions[0] != %(x)s->dimensions[0])
||(%(gx)s->dimensions[1] != %(x)s->dimensions[1])
||(%(gx)s->dimensions[2] != %(x)s->dimensions[2])
||(%(gx)s->dimensions[3] != %(x)s->dimensions[3])
||(PyArray_DIMS(%(gx)s)[0] != PyArray_DIMS(%(x)s)[0])
||(PyArray_DIMS(%(gx)s)[1] != PyArray_DIMS(%(x)s)[1])
||(PyArray_DIMS(%(gx)s)[2] != PyArray_DIMS(%(x)s)[2])
||(PyArray_DIMS(%(gx)s)[3] != PyArray_DIMS(%(x)s)[3])
)
{
Py_XDECREF(%(gx)s);
%(gx)s = (PyArrayObject*) PyArray_ZEROS(4, %(x)s->dimensions, x_typenum,0);
%(gx)s = (PyArrayObject*) PyArray_ZEROS(4, PyArray_DIMS(%(x)s), x_typenum,0);
}
for(int b=0;b<%(x)s->dimensions[0];b++){
for(int k=0;k<%(x)s->dimensions[1];k++){
for(int b=0;b<PyArray_DIMS(%(x)s)[0];b++){
for(int k=0;k<PyArray_DIMS(%(x)s)[1];k++){
int mini_i = 0;
int zi = 0;
for(int i=0;i< x_shp0_usable; i++){
......@@ -364,14 +364,14 @@ class DownsampleFactorMaxGrad(Op):
mini_i = (mini_i + 1 == %(ds0)s) ? 0 : mini_i+1;
zi += (mini_i == 0);
for (int j = x_shp1_usable; j < %(x)s->dimensions[3]; ++j) {
for (int j = x_shp1_usable; j < PyArray_DIMS(%(x)s)[3]; ++j) {
dtype_%(gx)s * gxp = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s,b,k,i,j)));
gxp[0] = 0;
}
}//for i
for(int i = x_shp0_usable; i < %(x)s->dimensions[2]; i++){
for (int j = 0; j < %(x)s->dimensions[3]; ++j) {
for(int i = x_shp0_usable; i < PyArray_DIMS(%(x)s)[2]; i++){
for (int j = 0; j < PyArray_DIMS(%(x)s)[3]; ++j) {
dtype_%(gx)s * gxp = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s,b,k,i,j)));
gxp[0] = 0;
}
......
......@@ -5846,11 +5846,12 @@ class test_arithmetic_cast(unittest.TestCase):
config.int_division == 'floatX'):
assert theano_dtype == config.floatX
continue
numpy_version =numpy.__version__.split('.')[:2]
if (cfg == 'numpy+floatX' and
a_type == 'complex128' and
b_type == 'float32' and
combo == ('scalar', 'array') and
numpy.__version__.startswith('1.6.') and
bool(numpy_version >= [1, 6]) and
theano_dtype == 'complex128' and
numpy_dtypes == ['complex64',
'complex64']):
......@@ -5860,7 +5861,7 @@ class test_arithmetic_cast(unittest.TestCase):
# in progress), so in the meantime we just
# mark this test as a known failure.
raise KnownFailureTest('Known issue with '
'numpy 1.6.x, see #761')
'numpy >= 1.6.x see #761')
# In any other situation: something wrong is
# going on!
......
......@@ -918,7 +918,7 @@ class T_fibby(unittest.TestCase):
return """
Py_XDECREF(%(y)s);
%(y)s = (PyArrayObject*)PyArray_FromArray(
%(x)s, 0, NPY_ENSURECOPY);
%(x)s, 0, NPY_ARRAY_ENSURECOPY);
if (!(%y)s) %(fail)s;
dtype_%(y)s * y = (dtype_%(y)s*)%(y)s->data;
dtype_%(x)s * x = (dtype_%(x)s*)%(x)s->data;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论