提交 5178e406 authored 作者: Frederic's avatar Frederic

var->strides to PyArray_STRIDES(var) for numpy 1.7

上级 5255f58c
......@@ -4048,7 +4048,7 @@ class Subtensor(Op):
%(x)s->descr,
%(view_ndim)s,
PyArray_DIMS(%(x)s),
%(x)s->strides,
PyArray_STRIDES(%(x)s),
%(x)s->data,
%(x)s->flags,
NULL);
......@@ -4066,13 +4066,13 @@ class Subtensor(Op):
PyArray_NDIM(%(x)s), PyArray_DIMS(xview), PyArray_DIMS(%(x)s));
%(fail)s;
}
if (xview->strides == %(x)s->strides
if (PyArray_STRIDES(xview) == PyArray_STRIDES(%(x)s)
&& (PyArray_DIMS(%(x)s) != NULL))
{
PyErr_Format(PyExc_ValueError, "x and xview"
"(with %%d dims) have the same strides"
" pointers: %%p and %%p",
PyArray_NDIM(%(x)s), xview->strides, %(x)s->strides);
PyArray_NDIM(%(x)s), PyArray_STRIDES(xview), PyArray_STRIDES(%(x)s));
%(fail)s;
}
......@@ -4144,9 +4144,9 @@ class Subtensor(Op):
}
assert (slicelength <= length);
xview->data += %(x)s->strides[outer_ii] * start;
xview->data += PyArray_STRIDES(%(x)s)[outer_ii] * start;
PyArray_DIMS(xview)[inner_ii] = slicelength;
xview->strides[inner_ii] = %(x)s->strides[outer_ii] * step;
PyArray_STRIDES(xview)[inner_ii] = PyArray_STRIDES(%(x)s)[outer_ii] * step;
inner_ii += 1;
spec_pos += 3;
......@@ -4159,7 +4159,7 @@ class Subtensor(Op):
{
if (idx < PyArray_DIMS(%(x)s)[outer_ii])
{
xview->data += %(x)s->strides[outer_ii] * idx;
xview->data += PyArray_STRIDES(%(x)s)[outer_ii] * idx;
}
else
{
......@@ -4181,7 +4181,7 @@ class Subtensor(Op):
{
assert (outer_ii < PyArray_NDIM(%(x)s));
PyArray_DIMS(xview)[inner_ii] = PyArray_DIMS(%(x)s)[outer_ii];
xview->strides[inner_ii] = %(x)s->strides[outer_ii];
PyArray_STRIDES(xview)[inner_ii] = PyArray_STRIDES(%(x)s)[outer_ii];
inner_ii += 1;
outer_ii += 1;
}
......@@ -5385,7 +5385,7 @@ class Reshape(Op):
// -- will err if this will downcast. This could happen if the
// -- user pass an int64 dtype, but npy_intp endup being int32.
new_dims[ii] = ((dtype_%(shp)s*)(
%(shp)s->data + ii * %(shp)s->strides[0]))[0];
%(shp)s->data + ii * PyArray_STRIDES(%(shp)s)[0]))[0];
}
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject *) PyArray_Newshape(%(x)s, &newshape,
......
......@@ -500,9 +500,9 @@ class GemmRelated(Op):
npy_intp* Ny = PyArray_DIMS(%(_y)s);
npy_intp* Nz = 0; //PyArray_DIMS(%(_zout)s);
npy_intp* Sx = %(_x)s->strides;
npy_intp* Sy = %(_y)s->strides;
npy_intp* Sz = 0; //%(_zout)s->strides;
npy_intp* Sx = PyArray_STRIDES(%(_x)s);
npy_intp* Sy = PyArray_STRIDES(%(_y)s);
npy_intp* Sz = 0; //PyArray_STRIDES(%(_zout)s);
//strides for x, y, z in dimensions 0, 1
int sx_0, sx_1, sy_0, sy_1, sz_0, sz_1;
......@@ -597,7 +597,7 @@ class GemmRelated(Op):
%(fail)s
Py_XDECREF(%(_x)s);
%(_x)s = _x_copy;
Sx = %(_x)s->strides;
Sx = PyArray_STRIDES(%(_x)s);
}
if ((Sy[0] < 1) || (Sy[1] < 1) || (Sy[0] MOD type_size) || (Sy[1] MOD type_size)
......@@ -608,7 +608,7 @@ class GemmRelated(Op):
%(fail)s
Py_XDECREF(%(_y)s);
%(_y)s = _y_copy;
Sy = %(_y)s->strides;
Sy = PyArray_STRIDES(%(_y)s);
}
if ((Sz[0] < 1) || (Sz[1] < 1) || (Sz[0] MOD type_size) || (Sz[1] MOD type_size)
......@@ -619,7 +619,7 @@ class GemmRelated(Op):
%(fail)s
Py_XDECREF(%(_zout)s);
%(_zout)s = _z_copy;
Sz = %(_zout)s->strides;
Sz = PyArray_STRIDES(%(_zout)s);
}
"""
......@@ -889,19 +889,19 @@ class Gemm(GemmRelated):
Py_INCREF(%(_zout)s);
}
Nz = PyArray_DIMS(%(_z)s);
Sz = %(_z)s->strides;
Sz = PyArray_STRIDES(%(_z)s);
"""
setup_z_Nz_Sz_outplace = """
if ((NULL == %(_zout)s)
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_z)s)[0])
|| (PyArray_DIMS(%(_zout)s)[1] != PyArray_DIMS(%(_z)s)[1])
|| (%(_zout)s->strides[0] <= 0)
|| (%(_zout)s->strides[1] <= 0)
|| (%(_zout)s->strides[0] MOD type_size)
|| (%(_zout)s->strides[1] MOD type_size)
|| ((%(_zout)s->strides[0] != type_size)
&& (%(_zout)s->strides[1] != type_size)))
|| (PyArray_STRIDES(%(_zout)s)[0] <= 0)
|| (PyArray_STRIDES(%(_zout)s)[1] <= 0)
|| (PyArray_STRIDES(%(_zout)s)[0] MOD type_size)
|| (PyArray_STRIDES(%(_zout)s)[1] MOD type_size)
|| ((PyArray_STRIDES(%(_zout)s)[0] != type_size)
&& (PyArray_STRIDES(%(_zout)s)[1] != type_size)))
{
Py_XDECREF(%(_zout)s);
npy_intp dims[2];
......@@ -917,7 +917,7 @@ class Gemm(GemmRelated):
}
}
Nz = PyArray_DIMS(%(_zout)s);
Sz = %(_zout)s->strides;
Sz = PyArray_STRIDES(%(_zout)s);
if (%(_zout)s->descr->type_num == NPY_FLOAT)
{
......@@ -925,8 +925,8 @@ class Gemm(GemmRelated):
int zoi = Sz[0] / sizeof(float);
int zoj = Sz[1] / sizeof(float);
const float * zdata = (float*)%(_z)s->data;
int zi = %(_z)s->strides[0]/sizeof(float);
int zj = %(_z)s->strides[1]/sizeof(float);
int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(float);
int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(float);
for (int i = 0; i < Nz[0]; ++i)
{
for (int j = 0; j < Nz[1]; ++j)
......@@ -941,8 +941,8 @@ class Gemm(GemmRelated):
int zoi = Sz[0] / sizeof(double);
int zoj = Sz[1] / sizeof(double);
const double * zdata = (double*)%(_z)s->data;
int zi = %(_z)s->strides[0]/sizeof(double);
int zj = %(_z)s->strides[1]/sizeof(double);
int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(double);
int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(double);
for (int i = 0; i < Nz[0]; ++i)
{
for (int j = 0; j < Nz[1]; ++j)
......@@ -1496,7 +1496,7 @@ class Dot22(GemmRelated):
}
}
Nz = PyArray_DIMS(%(_zout)s);
Sz = %(_zout)s->strides;
Sz = PyArray_STRIDES(%(_zout)s);
"""
check_ab_double_or_float = ""
......
......@@ -70,10 +70,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
// copy A if !self.destructive or A is fully strided
if (!%(destructive)s
|| (%(A)s->strides[0] < 0)
|| (%(A)s->strides[1] < 0)
|| ((%(A)s->strides[0] != elemsize)
&& (%(A)s->strides[1] != elemsize)))
|| (PyArray_STRIDES(%(A)s)[0] < 0)
|| (PyArray_STRIDES(%(A)s)[1] < 0)
|| ((PyArray_STRIDES(%(A)s)[0] != elemsize)
&& (PyArray_STRIDES(%(A)s)[1] != elemsize)))
{
npy_intp dims[2];
dims[0] = PyArray_DIMS(%(A)s)[0];
......@@ -82,10 +82,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
if ((NULL == %(Z)s)
|| (PyArray_DIMS(%(Z)s)[0] != PyArray_DIMS(%(A)s)[0])
|| (PyArray_DIMS(%(Z)s)[1] != PyArray_DIMS(%(A)s)[1])
|| (%(Z)s->strides[0] < 0)
|| (%(Z)s->strides[1] < 0)
|| ((%(Z)s->strides[0] != elemsize)
&& (%(Z)s->strides[1] != elemsize)))
|| (PyArray_STRIDES(%(Z)s)[0] < 0)
|| (PyArray_STRIDES(%(Z)s)[1] < 0)
|| ((PyArray_STRIDES(%(Z)s)[0] != elemsize)
&& (PyArray_STRIDES(%(Z)s)[1] != elemsize)))
{
if (%(Z)s) Py_XDECREF(%(Z)s);
%(Z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
......@@ -105,10 +105,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
{
float * zoutdata = (float*)%(Z)s->data;
const float * zdata = (float*)%(A)s->data;
int Ai = %(A)s->strides[0]/sizeof(float);
int Aj = %(A)s->strides[1]/sizeof(float);
int Zi = %(Z)s->strides[0]/sizeof(float);
int Zj = %(Z)s->strides[1]/sizeof(float);
int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(float);
int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(float);
int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(float);
int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(float);
for (int i = 0; i < dims[0]; ++i)
{
for (int j = 0; j < dims[1]; ++j)
......@@ -121,10 +121,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
{
double * zoutdata = (double*) %(Z)s->data;
const double * zdata = (double*)%(A)s->data;
int Ai = %(A)s->strides[0]/sizeof(double);
int Aj = %(A)s->strides[1]/sizeof(double);
int Zi = %(Z)s->strides[0]/sizeof(double);
int Zj = %(Z)s->strides[1]/sizeof(double);
int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(double);
int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(double);
int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(double);
int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(double);
for (int i = 0; i < dims[0]; ++i)
{
for (int j = 0; j < dims[1]; ++j)
......@@ -154,8 +154,8 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
{
int Nz0 = PyArray_DIMS(%(Z)s)[0];
int Nz1 = PyArray_DIMS(%(Z)s)[1];
int Sx = %(x)s->strides[0] / elemsize;
int Sy = %(y)s->strides[0] / elemsize;
int Sx = PyArray_STRIDES(%(x)s)[0] / elemsize;
int Sy = PyArray_STRIDES(%(y)s)[0] / elemsize;
/* create appropriate strides for Z, if it is a row or column matrix.
* In that case, the value of the stride does not really matter, but
......@@ -163,8 +163,8 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
* - they are not smaller than the number of elements in the array,
* - they are not 0.
*/
int Sz0 = (Nz0 > 1) ? (%(Z)s->strides[0] / elemsize) : (Nz1 + 1);
int Sz1 = (Nz1 > 1) ? (%(Z)s->strides[1] / elemsize) : (Nz0 + 1);
int Sz0 = (Nz0 > 1) ? (PyArray_STRIDES(%(Z)s)[0] / elemsize) : (Nz1 + 1);
int Sz1 = (Nz1 > 1) ? (PyArray_STRIDES(%(Z)s)[1] / elemsize) : (Nz0 + 1);
dtype_%(x)s* x_data = (dtype_%(x)s*) %(x)s->data;
dtype_%(y)s* y_data = (dtype_%(y)s*) %(y)s->data;
......@@ -176,7 +176,7 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
if (Sy < 0)
y_data += (Nz1 - 1) * Sy;
if (%(Z)s->strides[0] == elemsize)
if (PyArray_STRIDES(%(Z)s)[0] == elemsize)
{
if (%(Z)s->descr->type_num == NPY_FLOAT)
{
......@@ -201,7 +201,7 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
%(fail)s
}
}
else if (%(Z)s->strides[1] == elemsize)
else if (PyArray_STRIDES(%(Z)s)[1] == elemsize)
{
if (%(Z)s->descr->type_num == NPY_FLOAT)
{
......@@ -369,8 +369,8 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
{
float * zoutdata = (float*)%(zz)s->data;
const float * zdata = (float*)%(aa)s->data;
int Ai = %(aa)s->strides[0]/sizeof(float);
int Zi = %(zz)s->strides[0]/sizeof(float);
int Ai = PyArray_STRIDES(%(aa)s)[0]/sizeof(float);
int Zi = PyArray_STRIDES(%(zz)s)[0]/sizeof(float);
for (int i = 0; i < PyArray_DIMS(%(aa)s)[0]; ++i)
{
zoutdata[Zi*i] = fbeta * zdata[Ai*i];
......@@ -380,8 +380,8 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
{
double * zoutdata = (double*) %(zz)s->data;
const double * zdata = (double*)%(aa)s->data;
int Ai = %(aa)s->strides[0]/sizeof(double);
int Zi = %(zz)s->strides[0]/sizeof(double);
int Ai = PyArray_STRIDES(%(aa)s)[0]/sizeof(double);
int Zi = PyArray_STRIDES(%(zz)s)[0]/sizeof(double);
for (int i = 0; i < PyArray_DIMS(%(aa)s)[0]; ++i)
{
zoutdata[Zi*i] = dbeta * zdata[Ai*i];
......@@ -416,10 +416,10 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
* - are not smaller than the number of elements in the array
* - are not 0.
*/
int Sx0 = (Nx0 > 1) ? (%(xx)s->strides[0] / elemsize) : (Nx1 + 1);
int Sx1 = (Nx1 > 1) ? (%(xx)s->strides[1] / elemsize) : (Nx0 + 1);
int Sz = %(zz)s->strides[0] / elemsize;
int Sy = %(yy)s->strides[0] / elemsize;
int Sx0 = (Nx0 > 1) ? (PyArray_STRIDES(%(xx)s)[0] / elemsize) : (Nx1 + 1);
int Sx1 = (Nx1 > 1) ? (PyArray_STRIDES(%(xx)s)[1] / elemsize) : (Nx0 + 1);
int Sz = PyArray_STRIDES(%(zz)s)[0] / elemsize;
int Sy = PyArray_STRIDES(%(yy)s)[0] / elemsize;
dtype_%(yy)s* yy_data = (dtype_%(yy)s*) %(yy)s->data;
dtype_%(zz)s* zz_data = (dtype_%(zz)s*) %(zz)s->data;
......@@ -439,10 +439,10 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
// gemv on reversed matrix and vectors
// - if the copy is too long, maybe call vector/vector dot on
// each row instead
if ((%(xx)s->strides[0] < 0)
|| (%(xx)s->strides[1] < 0)
|| ((%(xx)s->strides[0] != elemsize)
&& (%(xx)s->strides[1] != elemsize)))
if ((PyArray_STRIDES(%(xx)s)[0] < 0)
|| (PyArray_STRIDES(%(xx)s)[1] < 0)
|| ((PyArray_STRIDES(%(xx)s)[0] != elemsize)
&& (PyArray_STRIDES(%(xx)s)[1] != elemsize)))
{
npy_intp dims[2];
dims[0] = Nx0;
......@@ -454,11 +454,11 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
%(fail)s
Py_XDECREF(%(xx)s);
%(xx)s = xx_copy;
Sx0 = (Nx0 > 1) ? (%(xx)s->strides[0] / elemsize) : (Nx1 + 1);
Sx1 = (Nx1 > 1) ? (%(xx)s->strides[1] / elemsize) : (Nx0 + 1);
Sx0 = (Nx0 > 1) ? (PyArray_STRIDES(%(xx)s)[0] / elemsize) : (Nx1 + 1);
Sx1 = (Nx1 > 1) ? (PyArray_STRIDES(%(xx)s)[1] / elemsize) : (Nx0 + 1);
}
if (%(xx)s->strides[0] == elemsize)
if (PyArray_STRIDES(%(xx)s)[0] == elemsize)
{
if (%(xx)s->descr->type_num == NPY_FLOAT)
{
......@@ -488,7 +488,7 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
%(fail)s
}
}
else if (%(xx)s->strides[1] == elemsize)
else if (PyArray_STRIDES(%(xx)s)[1] == elemsize)
{
if (%(xx)s->descr->type_num == NPY_FLOAT)
{
......
......@@ -801,9 +801,9 @@ def ____gemm_code(check_ab, a_init, b_init):
npy_intp* Ny = PyArray_DIMS(_y);
npy_intp* Nz = PyArray_DIMS(_z);
npy_intp* Sx = _x->strides;
npy_intp* Sy = _y->strides;
npy_intp* Sz = _z->strides;
npy_intp* Sx = PyArray_STRIDES(_x);
npy_intp* Sy = PyArray_STRIDES(_y);
npy_intp* Sz = PyArray_STRIDES(_z);
size_t sx_0, sx_1, sy_0, sy_1, sz_0, sz_1;
......
......@@ -298,7 +298,7 @@ class DimShuffle(Op):
for i, o in enumerate(self.new_order):
if o != 'x':
strides_statements += [('strides[' + str(i)
+ '] = %(basename)s->strides[' + str(o) + ']')]
+ '] = PyArray_STRIDES(%(basename)s)[' + str(o) + ']')]
else:
strides_statements += [('strides[' + str(i) + '] = 0')]
......
......@@ -68,7 +68,7 @@ def make_checks(loop_orders, dtypes, sub):
jump = "(%s) - (%s)" % ("%(var)s_stride%(index)s" % locals(), adjust)
init += """
%(var)s_n%(index)s = PyArray_DIMS(%(var)s)[%(index)s];
%(var)s_stride%(index)s = %(var)s->strides[%(index)s] / sizeof(%(dtype)s);
%(var)s_stride%(index)s = PyArray_STRIDES(%(var)s)[%(index)s] / sizeof(%(dtype)s);
%(var)s_jump%(index)s_%(j)s = %(jump)s;
//printf("%(var)s_jump%(index)s_%(j)s is:");
//std::cout << %(var)s_jump%(index)s_%(j)s << std::endl;
......@@ -263,7 +263,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
for i, index in enumerate(init_loop_orders[olv_index]):
if index != 'x':
order_loops += """
%(ovar)s_loops_it->first = abs(%(ovar)s->strides[%(index)i]);
%(ovar)s_loops_it->first = abs(PyArray_STRIDES(%(ovar)s)[%(index)i]);
""" % locals()
else:
# Stride is 0 when dimension is broadcastable
......
......@@ -308,17 +308,17 @@ class Conv3D(theano.Op):
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )
const int ws0 = %(W)s->strides[0];
const int ws1 = %(W)s->strides[1];
const int ws2 = %(W)s->strides[2];
const int vs1 = %(V)s->strides[1];
const int ws4 = %(W)s->strides[4];
const int vs4 = %(V)s->strides[4];
const int ws3 = %(W)s->strides[3];
const int vs3 = %(V)s->strides[3];
const int vs2 = %(V)s->strides[2];
const int bs = %(b)s->strides[0];
const int hs4 = %(H)s->strides[4];
const int ws0 = PyArray_STRIDES(%(W)s)[0];
const int ws1 = PyArray_STRIDES(%(W)s)[1];
const int ws2 = PyArray_STRIDES(%(W)s)[2];
const int vs1 = PyArray_STRIDES(%(V)s)[1];
const int ws4 = PyArray_STRIDES(%(W)s)[4];
const int vs4 = PyArray_STRIDES(%(V)s)[4];
const int ws3 = PyArray_STRIDES(%(W)s)[3];
const int vs3 = PyArray_STRIDES(%(V)s)[3];
const int vs2 = PyArray_STRIDES(%(V)s)[2];
const int bs = PyArray_STRIDES(%(b)s)[0];
const int hs4 = PyArray_STRIDES(%(H)s)[4];
......@@ -424,20 +424,20 @@ class Conv3D(theano.Op):
Wpos = Wposl + ws2;
Vpos = Vposl + vs2;
} //close l
Wpos = Wposk + %(W)s->strides[1];
Vpos = Vposk + %(V)s->strides[1];
Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k
Hpos = Hpost + %(H)s->strides[3];
Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt;
} //close t
Hpos = Hposc + %(H)s->strides[2];
Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc;
} //close c
Hpos = Hposr + %(H)s->strides[1];
Vpos = Vposr + %(V)s->strides[1] * dr;
Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r
Hpos = Hposi + %(H)s->strides[0];
Vpos = Vposi + %(V)s->strides[0];
Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i
......@@ -515,8 +515,8 @@ class Conv3D(theano.Op):
Wpos = Wposl + ws2;
Vpos = Vposl + vs2;
} //close l
Wpos = Wposk + %(W)s->strides[1];
Vpos = Vposk + %(V)s->strides[1];
Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k
......@@ -527,17 +527,17 @@ class Conv3D(theano.Op):
//std::cout << "incremented Wpos by " << ws0 << std::endl;
//std::cout << "incremented Hpos by " << hs4 << std::endl;
} //close j
Hpos = Hpost + %(H)s->strides[3];
Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt;
} //close t
Hpos = Hposc + %(H)s->strides[2];
Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc;
} //close c
Hpos = Hposr + %(H)s->strides[1];
Vpos = Vposr + %(V)s->strides[1] * dr;
Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r
Hpos = Hposi + %(H)s->strides[0];
Vpos = Vposi + %(V)s->strides[0];
Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i
} //closes general case code
}}}}}}} //extra scope so error handler jumps don't cross declarations
......
......@@ -226,12 +226,12 @@ class ConvGrad3D(theano.Op):
}
{ //extra scope so fail works
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*x->strides[0]+(j)*x->strides[1]+(k)*x->strides[2]+(l)*x->strides[3]+(m)*x->strides[4] )
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )
const int dhs3 = %(dCdH)s->strides[3];
const int dtvs3 = dt * %(V)s->strides[3];
const int dhs3 = PyArray_STRIDES(%(dCdH)s)[3];
const int dtvs3 = dt * PyArray_STRIDES(%(V)s)[3];
// Compute dCdW
//TODO-- see if this can be made faster by using ELEM_AT instead of ELEM5
......@@ -249,8 +249,8 @@ class ConvGrad3D(theano.Op):
for (int i = 0; i < batchSize; i++) {
for (int p = 0; p < outputHeight; p++) {
for (int q = 0; q < outputWidth; q++) {
int Hpos = i * %(dCdH)s->strides[0] + j * %(dCdH)s->strides[4] + p * %(dCdH)s->strides[1] + q * %(dCdH)s->strides[2] ;
int Vpos = i * %(V)s->strides[0] + z * %(V)s->strides[4] + (dr * p+k) * %(V)s->strides[1] + (dc*q+l) * %(V)s->strides[2] + m * %(V)s->strides[3];
int Hpos = i * PyArray_STRIDES(%(dCdH)s)[0] + j * PyArray_STRIDES(%(dCdH)s)[4] + p * PyArray_STRIDES(%(dCdH)s)[1] + q * PyArray_STRIDES(%(dCdH)s)[2] ;
int Vpos = i * PyArray_STRIDES(%(V)s)[0] + z * PyArray_STRIDES(%(V)s)[4] + (dr * p+k) * PyArray_STRIDES(%(V)s)[1] + (dc*q+l) * PyArray_STRIDES(%(V)s)[2] + m * PyArray_STRIDES(%(V)s)[3];
for (int r = 0; r < outputDur; r++) {
writePos += ELEM5(%(dCdH)s,i,p,q,r,j) * ELEM5(%(V)s,i,dr*p+k,dc*q+l,dt*r+m,z);
......
......@@ -237,17 +237,17 @@ class ConvTransp3D(theano.Op):
{ // for fail 6
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*x->strides[0]+(j)*x->strides[1]+(k)*x->strides[2]+(l)*x->strides[3]+(m)*x->strides[4] )
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )
dtype_%(b)s * b = (dtype_%(b)s *) %(b)s->data;
int rs4 = %(R)s->strides[4];
int ws0 = %(W)s->strides[0];
int ws4 = %(W)s->strides[4];
int hs4 = %(H)s->strides[4];
int rs4 = PyArray_STRIDES(%(R)s)[4];
int ws0 = PyArray_STRIDES(%(W)s)[0];
int ws4 = PyArray_STRIDES(%(W)s)[4];
int hs4 = PyArray_STRIDES(%(H)s)[4];
// Compute R
// R[i,r,c,t,j] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, rk, ck, tk,j] * H[i,rc,cc,tc,k]
......@@ -260,7 +260,7 @@ class ConvTransp3D(theano.Op):
for (int t = 0; t < videoDur; t++) {
const int ftc = (int)std::max(0.0f, ceilf(float(t-filterDur +1) /float(dt)));
long long Rpost = i * %(R)s->strides[0] + r * %(R)s->strides[1] + c * %(R)s->strides[2] + t * %(R)s->strides[3];
long long Rpost = i * PyArray_STRIDES(%(R)s)[0] + r * PyArray_STRIDES(%(R)s)[1] + c * PyArray_STRIDES(%(R)s)[2] + t * PyArray_STRIDES(%(R)s)[3];
long long Rpos = Rpost;
for (int j = 0; j < inputChannels; j++)
......@@ -284,8 +284,8 @@ class ConvTransp3D(theano.Op):
const int tk = t - tc * dt;
if (tk < 0) break;
int Wpos = rk * %(W)s->strides[1] + ck * %(W)s->strides[2] + tk * %(W)s->strides[3];
int Hpostc = i * %(H)s->strides[0] + rc * %(H)s->strides[1] + cc * %(H)s->strides[2] + tc * %(H)s->strides[3];
int Wpos = rk * PyArray_STRIDES(%(W)s)[1] + ck * PyArray_STRIDES(%(W)s)[2] + tk * PyArray_STRIDES(%(W)s)[3];
int Hpostc = i * PyArray_STRIDES(%(H)s)[0] + rc * PyArray_STRIDES(%(H)s)[1] + cc * PyArray_STRIDES(%(H)s)[2] + tc * PyArray_STRIDES(%(H)s)[3];
Rpos = Rpost;
for (int j = 0; j < inputChannels; j++)
{
......
......@@ -970,7 +970,7 @@ class ConvOp(OpenMPOp):
def c_support_code(self):
return """
#define STRIDES(arr) ((arr)->strides)
#define STRIDES(arr) (PyArray_STRIDES(arr))
#define FULL 2
#define SAME 1
#define VALID 0
......@@ -1242,7 +1242,7 @@ _conv_op_code_a = """
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL;
PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL;
PyArrayObject *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
const %(type)s fill_value = 0;
int type_im=PyArray_TYPE(%(img2d)s);
......@@ -1305,12 +1305,12 @@ if(PyArray_NDIM(%(filtersflipped)s)==3){
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
......@@ -1319,17 +1319,17 @@ img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError,
......@@ -1364,36 +1364,37 @@ if ((!%(z)s)
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
int Os[2];
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//assertions
if (%(z)s->strides[0] != PyArray_DIMS(%(z)s)[1] *
if (PyArray_STRIDES(%(z)s)[0] != PyArray_DIMS(%(z)s)[1] *
PyArray_DIMS(%(z)s)[2] *
PyArray_DIMS(%(z)s)[3] *
(npy_intp)sizeof(%(type)s))
%(fail)s;
if (%(z)s->strides[1] != PyArray_DIMS(%(z)s)[2] *
if (PyArray_STRIDES(%(z)s)[1] != PyArray_DIMS(%(z)s)[2] *
PyArray_DIMS(%(z)s)[3] *
(npy_intp)sizeof(%(type)s))
%(fail)s;
if (%(z)s->strides[2] != PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s))
if (PyArray_STRIDES(%(z)s)[2] != PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s))
%(fail)s;
if (%(z)s->strides[3] != (npy_intp)sizeof(%(type)s))
if (PyArray_STRIDES(%(z)s)[3] != (npy_intp)sizeof(%(type)s))
%(fail)s;
for(int b=0;b< %(self_bsize)s;b++){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(%(z)s,b,n_kern));
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
......@@ -1514,7 +1515,7 @@ Py_XDECREF(filtersflipped);
_conv_op_code_valid_gemm = """
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *img2d_arr=NULL;
PyArrayObject *ain1=NULL, *ain2=NULL, *img2d_arr=NULL, *z_arr=NULL;
const int NKERN = %(self_nkern)s;
int type_im=PyArray_TYPE(%(img2d)s);
......@@ -1578,12 +1579,12 @@ if (NKERN != kerns_dim[0])
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
......@@ -1617,6 +1618,7 @@ if ((!%(z)s)
}else{
PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
%(assert_size)s
......@@ -1672,7 +1674,7 @@ for(int b=0;b< %(self_bsize)s;b++){
int imgview_stride = dim_im[1];
int filter_rows_stride =kerns_dim[1]*kerns_dim[2]*kerns_dim[3];
//remember, Fortran wants a column-major interpretation
assert(img2d->strides[3] == (npy_intp)sizeof(%(type)s));
assert(PyArray_STRIDES(img2d)[3] == (npy_intp)sizeof(%(type)s));
if (0){
std::cerr << "b " << b << " img_col " << img_col << " filterrow " << filter_row << " stackidx " <<stackidx << "\\n";
......@@ -1766,7 +1768,7 @@ def gen_conv_code_unroll_batch_kern(d, unroll_bsize=1, unroll_ksize=1):
ret = """
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;;
const %(type)s fill_value = 0;
int type_im=PyArray_TYPE(%(img2d)s);
......@@ -1827,12 +1829,12 @@ if(PyArray_NDIM(%(filtersflipped)s)==3){
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (img2d_arr->strides[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(img2d_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
......@@ -1841,17 +1843,17 @@ img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != (npy_intp)sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
if ((PyArray_STRIDES(filtersflipped_arr)[3] != (npy_intp)sizeof(%(type)s))
|| (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*(npy_intp)sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
......@@ -1881,28 +1883,29 @@ if ((!%(z)s)
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
int Os[2];
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//assertions
if (%(z)s->strides[0] != PyArray_DIMS(%(z)s)[1] *PyArray_DIMS(%(z)s)[2] *PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != PyArray_DIMS(%(z)s)[2] * PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != (npy_intp)sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[0] != PyArray_DIMS(%(z)s)[1] *PyArray_DIMS(%(z)s)[2] *PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[1] != PyArray_DIMS(%(z)s)[2] * PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[2] != PyArray_DIMS(%(z)s)[3] * (npy_intp)sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[3] != (npy_intp)sizeof(%(type)s)) %(fail)s;
for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
""" % d
ret += my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(%(z)s,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));")
ret += my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(z_arr,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));")
ret += my_dup("for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;", unroll_bsize * unroll_ksize)
ret += """
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
""" % d
ret += my_dup("const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d,b+%(unroll_iter)s,stack_size));", unroll_bsize)
ret += my_dup("const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern+%(unroll_iter)s,stack_size));", unroll_ksize)
ret += my_dup("const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b+%(unroll_iter)s,stack_size));", unroll_bsize)
ret += my_dup("const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern+%(unroll_iter)s,stack_size));", unroll_ksize)
ret += """
int new_m;
......@@ -1999,7 +2002,7 @@ Py_XDECREF(filtersflipped);
_conv_op_code_unroll_patch = """
const int mode=%(mode)s;
int typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL, *z_arr=NULL;
const %(type)s fill_value = 0;//only value of 0 are currently tested and correctly implemented
int type_im=PyArray_TYPE(%(img2d)s);
......@@ -2062,12 +2065,12 @@ if(PyArray_NDIM(%(filtersflipped)s)==3){
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, NPY_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(%(type)s))
|| (img2d_arr->strides[2] != PyArray_DIMS(img2d_arr)[3]*sizeof(%(type)s))){
if ((PyArray_STRIDES(img2d_arr)[3] != sizeof(%(type)s))
|| (PyArray_STRIDES(img2d_arr)[2] != PyArray_DIMS(img2d_arr)[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
if (!PyArray_ISCONTIGUOUS(img2d_arr)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
......@@ -2076,17 +2079,17 @@ img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, NPY_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != PyArray_DIMS(filtersflipped_arr)[3]*sizeof(%(type)s))){
if ((PyArray_STRIDES(filtersflipped_arr)[3] != sizeof(%(type)s))
|| (PyArray_STRIDES(filtersflipped_arr)[2] != PyArray_DIMS(filtersflipped_arr)[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if (!PyArray_ISCONTIGUOUS(filtersflipped_arr)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(mode != VALID && mode != FULL){
PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
......@@ -2124,12 +2127,13 @@ if ((!%(z)s)
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
z_arr = (PyArrayObject*) %(z)s;
//assertions
if (%(z)s->strides[0] != PyArray_DIMS(%(z)s)[1] *PyArray_DIMS(%(z)s)[2] *PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != PyArray_DIMS(%(z)s)[2] * PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[0] != PyArray_DIMS(%(z)s)[1] *PyArray_DIMS(%(z)s)[2] *PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[1] != PyArray_DIMS(%(z)s)[2] * PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[2] != PyArray_DIMS(%(z)s)[3] * sizeof(%(type)s)) %(fail)s;
if (PyArray_STRIDES(%(z)s)[3] != sizeof(%(type)s)) %(fail)s;
//The if on the number of loop make a speed up for small array.
//with g++ 4.5.1. The compiler should be smart enough to do this himself!
......@@ -2144,13 +2148,13 @@ for(int batch_kern_idx=0;
int b = batch_kern_idx / %(self_nkern)s;
int n_kern = batch_kern_idx %% %(self_nkern)s;
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(%(z)s,b,n_kern));
%(type)s * __restrict__ out=(%(type)s *)(PyArray_GETPTR2(z_arr,b,n_kern));
for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out[i] = 0;
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern,stack_size));
const %(type)s * __restrict__ in=(%(type)s *)(PyArray_GETPTR2(img2d_arr,b,stack_size));
const %(type)s * __restrict__ hvals=(%(type)s *)(PyArray_GETPTR2(filtersflipped_arr,n_kern,stack_size));
int new_m;
......
......@@ -159,15 +159,15 @@ class SoftmaxWithBias(gof.Op):
double sum = 0.0;
bool discount_max = false;
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * i);
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(%(x)s->data + PyArray_STRIDES(%(x)s)[0] * i);
const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(%(b)s->data);
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i);
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + PyArray_STRIDES(%(sm)s)[0] * i);
"""
inside_row_loop = """
npy_intp Sx = %(x)s->strides[1]/sizeof(dtype_%(x)s);
npy_intp Sb = %(b)s->strides[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
size_t row_max_j=0;
dtype_%(sm)s row_max = x_i[0] + b_i[0];
......@@ -306,12 +306,12 @@ class SoftmaxGrad(gof.Op):
for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
{
const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (%(dy)s->data + %(dy)s->strides[0] * i);
npy_intp Sdy = %(dy)s->strides[1]/sizeof(dtype_%(dy)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*) (%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s);
const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (%(dy)s->data + PyArray_STRIDES(%(dy)s)[0] * i);
npy_intp Sdy = PyArray_STRIDES(%(dy)s)[1]/sizeof(dtype_%(dy)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (%(sm)s->data + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*) (%(dx)s->data + PyArray_STRIDES(%(dx)s)[0] * i);
npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
double sum_dy_times_sm = 0.;
for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
......@@ -825,9 +825,9 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
""",
begin_row_loop,
"""
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(%(nll)s->data + %(nll)s->strides[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(%(nll)s->data + PyArray_STRIDES(%(nll)s)[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + PyArray_STRIDES(%(am)s)[0] * i);
""",
inside_row_loop,
"""
......@@ -977,15 +977,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
{
const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];
const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(%(dnll)s->data + PyArray_STRIDES(%(dnll)s)[0] * i))[0];
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(%(dx)s->data + PyArray_STRIDES(%(dx)s)[0] * i);
npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
{
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论