提交 5178e406 authored 作者: Frederic's avatar Frederic

var->strides to PyArray_STRIDES(var) for numpy 1.7

上级 5255f58c
......@@ -4048,7 +4048,7 @@ class Subtensor(Op):
%(x)s->descr,
%(view_ndim)s,
PyArray_DIMS(%(x)s),
%(x)s->strides,
PyArray_STRIDES(%(x)s),
%(x)s->data,
%(x)s->flags,
NULL);
......@@ -4066,13 +4066,13 @@ class Subtensor(Op):
PyArray_NDIM(%(x)s), PyArray_DIMS(xview), PyArray_DIMS(%(x)s));
%(fail)s;
}
if (xview->strides == %(x)s->strides
if (PyArray_STRIDES(xview) == PyArray_STRIDES(%(x)s)
&& (PyArray_DIMS(%(x)s) != NULL))
{
PyErr_Format(PyExc_ValueError, "x and xview"
"(with %%d dims) have the same strides"
" pointers: %%p and %%p",
PyArray_NDIM(%(x)s), xview->strides, %(x)s->strides);
PyArray_NDIM(%(x)s), PyArray_STRIDES(xview), PyArray_STRIDES(%(x)s));
%(fail)s;
}
......@@ -4144,9 +4144,9 @@ class Subtensor(Op):
}
assert (slicelength <= length);
xview->data += %(x)s->strides[outer_ii] * start;
xview->data += PyArray_STRIDES(%(x)s)[outer_ii] * start;
PyArray_DIMS(xview)[inner_ii] = slicelength;
xview->strides[inner_ii] = %(x)s->strides[outer_ii] * step;
PyArray_STRIDES(xview)[inner_ii] = PyArray_STRIDES(%(x)s)[outer_ii] * step;
inner_ii += 1;
spec_pos += 3;
......@@ -4159,7 +4159,7 @@ class Subtensor(Op):
{
if (idx < PyArray_DIMS(%(x)s)[outer_ii])
{
xview->data += %(x)s->strides[outer_ii] * idx;
xview->data += PyArray_STRIDES(%(x)s)[outer_ii] * idx;
}
else
{
......@@ -4181,7 +4181,7 @@ class Subtensor(Op):
{
assert (outer_ii < PyArray_NDIM(%(x)s));
PyArray_DIMS(xview)[inner_ii] = PyArray_DIMS(%(x)s)[outer_ii];
xview->strides[inner_ii] = %(x)s->strides[outer_ii];
PyArray_STRIDES(xview)[inner_ii] = PyArray_STRIDES(%(x)s)[outer_ii];
inner_ii += 1;
outer_ii += 1;
}
......@@ -5385,7 +5385,7 @@ class Reshape(Op):
// -- will err if this will downcast. This could happen if the
// -- user pass an int64 dtype, but npy_intp endup being int32.
new_dims[ii] = ((dtype_%(shp)s*)(
%(shp)s->data + ii * %(shp)s->strides[0]))[0];
%(shp)s->data + ii * PyArray_STRIDES(%(shp)s)[0]))[0];
}
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject *) PyArray_Newshape(%(x)s, &newshape,
......
......@@ -500,9 +500,9 @@ class GemmRelated(Op):
npy_intp* Ny = PyArray_DIMS(%(_y)s);
npy_intp* Nz = 0; //PyArray_DIMS(%(_zout)s);
npy_intp* Sx = %(_x)s->strides;
npy_intp* Sy = %(_y)s->strides;
npy_intp* Sz = 0; //%(_zout)s->strides;
npy_intp* Sx = PyArray_STRIDES(%(_x)s);
npy_intp* Sy = PyArray_STRIDES(%(_y)s);
npy_intp* Sz = 0; //PyArray_STRIDES(%(_zout)s);
//strides for x, y, z in dimensions 0, 1
int sx_0, sx_1, sy_0, sy_1, sz_0, sz_1;
......@@ -597,7 +597,7 @@ class GemmRelated(Op):
%(fail)s
Py_XDECREF(%(_x)s);
%(_x)s = _x_copy;
Sx = %(_x)s->strides;
Sx = PyArray_STRIDES(%(_x)s);
}
if ((Sy[0] < 1) || (Sy[1] < 1) || (Sy[0] MOD type_size) || (Sy[1] MOD type_size)
......@@ -608,7 +608,7 @@ class GemmRelated(Op):
%(fail)s
Py_XDECREF(%(_y)s);
%(_y)s = _y_copy;
Sy = %(_y)s->strides;
Sy = PyArray_STRIDES(%(_y)s);
}
if ((Sz[0] < 1) || (Sz[1] < 1) || (Sz[0] MOD type_size) || (Sz[1] MOD type_size)
......@@ -619,7 +619,7 @@ class GemmRelated(Op):
%(fail)s
Py_XDECREF(%(_zout)s);
%(_zout)s = _z_copy;
Sz = %(_zout)s->strides;
Sz = PyArray_STRIDES(%(_zout)s);
}
"""
......@@ -889,19 +889,19 @@ class Gemm(GemmRelated):
Py_INCREF(%(_zout)s);
}
Nz = PyArray_DIMS(%(_z)s);
Sz = %(_z)s->strides;
Sz = PyArray_STRIDES(%(_z)s);
"""
setup_z_Nz_Sz_outplace = """
if ((NULL == %(_zout)s)
|| (PyArray_DIMS(%(_zout)s)[0] != PyArray_DIMS(%(_z)s)[0])
|| (PyArray_DIMS(%(_zout)s)[1] != PyArray_DIMS(%(_z)s)[1])
|| (%(_zout)s->strides[0] <= 0)
|| (%(_zout)s->strides[1] <= 0)
|| (%(_zout)s->strides[0] MOD type_size)
|| (%(_zout)s->strides[1] MOD type_size)
|| ((%(_zout)s->strides[0] != type_size)
&& (%(_zout)s->strides[1] != type_size)))
|| (PyArray_STRIDES(%(_zout)s)[0] <= 0)
|| (PyArray_STRIDES(%(_zout)s)[1] <= 0)
|| (PyArray_STRIDES(%(_zout)s)[0] MOD type_size)
|| (PyArray_STRIDES(%(_zout)s)[1] MOD type_size)
|| ((PyArray_STRIDES(%(_zout)s)[0] != type_size)
&& (PyArray_STRIDES(%(_zout)s)[1] != type_size)))
{
Py_XDECREF(%(_zout)s);
npy_intp dims[2];
......@@ -917,7 +917,7 @@ class Gemm(GemmRelated):
}
}
Nz = PyArray_DIMS(%(_zout)s);
Sz = %(_zout)s->strides;
Sz = PyArray_STRIDES(%(_zout)s);
if (%(_zout)s->descr->type_num == NPY_FLOAT)
{
......@@ -925,8 +925,8 @@ class Gemm(GemmRelated):
int zoi = Sz[0] / sizeof(float);
int zoj = Sz[1] / sizeof(float);
const float * zdata = (float*)%(_z)s->data;
int zi = %(_z)s->strides[0]/sizeof(float);
int zj = %(_z)s->strides[1]/sizeof(float);
int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(float);
int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(float);
for (int i = 0; i < Nz[0]; ++i)
{
for (int j = 0; j < Nz[1]; ++j)
......@@ -941,8 +941,8 @@ class Gemm(GemmRelated):
int zoi = Sz[0] / sizeof(double);
int zoj = Sz[1] / sizeof(double);
const double * zdata = (double*)%(_z)s->data;
int zi = %(_z)s->strides[0]/sizeof(double);
int zj = %(_z)s->strides[1]/sizeof(double);
int zi = PyArray_STRIDES(%(_z)s)[0]/sizeof(double);
int zj = PyArray_STRIDES(%(_z)s)[1]/sizeof(double);
for (int i = 0; i < Nz[0]; ++i)
{
for (int j = 0; j < Nz[1]; ++j)
......@@ -1496,7 +1496,7 @@ class Dot22(GemmRelated):
}
}
Nz = PyArray_DIMS(%(_zout)s);
Sz = %(_zout)s->strides;
Sz = PyArray_STRIDES(%(_zout)s);
"""
check_ab_double_or_float = ""
......
......@@ -70,10 +70,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
// copy A if !self.destructive or A is fully strided
if (!%(destructive)s
|| (%(A)s->strides[0] < 0)
|| (%(A)s->strides[1] < 0)
|| ((%(A)s->strides[0] != elemsize)
&& (%(A)s->strides[1] != elemsize)))
|| (PyArray_STRIDES(%(A)s)[0] < 0)
|| (PyArray_STRIDES(%(A)s)[1] < 0)
|| ((PyArray_STRIDES(%(A)s)[0] != elemsize)
&& (PyArray_STRIDES(%(A)s)[1] != elemsize)))
{
npy_intp dims[2];
dims[0] = PyArray_DIMS(%(A)s)[0];
......@@ -82,10 +82,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
if ((NULL == %(Z)s)
|| (PyArray_DIMS(%(Z)s)[0] != PyArray_DIMS(%(A)s)[0])
|| (PyArray_DIMS(%(Z)s)[1] != PyArray_DIMS(%(A)s)[1])
|| (%(Z)s->strides[0] < 0)
|| (%(Z)s->strides[1] < 0)
|| ((%(Z)s->strides[0] != elemsize)
&& (%(Z)s->strides[1] != elemsize)))
|| (PyArray_STRIDES(%(Z)s)[0] < 0)
|| (PyArray_STRIDES(%(Z)s)[1] < 0)
|| ((PyArray_STRIDES(%(Z)s)[0] != elemsize)
&& (PyArray_STRIDES(%(Z)s)[1] != elemsize)))
{
if (%(Z)s) Py_XDECREF(%(Z)s);
%(Z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
......@@ -105,10 +105,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
{
float * zoutdata = (float*)%(Z)s->data;
const float * zdata = (float*)%(A)s->data;
int Ai = %(A)s->strides[0]/sizeof(float);
int Aj = %(A)s->strides[1]/sizeof(float);
int Zi = %(Z)s->strides[0]/sizeof(float);
int Zj = %(Z)s->strides[1]/sizeof(float);
int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(float);
int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(float);
int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(float);
int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(float);
for (int i = 0; i < dims[0]; ++i)
{
for (int j = 0; j < dims[1]; ++j)
......@@ -121,10 +121,10 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
{
double * zoutdata = (double*) %(Z)s->data;
const double * zdata = (double*)%(A)s->data;
int Ai = %(A)s->strides[0]/sizeof(double);
int Aj = %(A)s->strides[1]/sizeof(double);
int Zi = %(Z)s->strides[0]/sizeof(double);
int Zj = %(Z)s->strides[1]/sizeof(double);
int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(double);
int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(double);
int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(double);
int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(double);
for (int i = 0; i < dims[0]; ++i)
{
for (int j = 0; j < dims[1]; ++j)
......@@ -154,8 +154,8 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
{
int Nz0 = PyArray_DIMS(%(Z)s)[0];
int Nz1 = PyArray_DIMS(%(Z)s)[1];
int Sx = %(x)s->strides[0] / elemsize;
int Sy = %(y)s->strides[0] / elemsize;
int Sx = PyArray_STRIDES(%(x)s)[0] / elemsize;
int Sy = PyArray_STRIDES(%(y)s)[0] / elemsize;
/* create appropriate strides for Z, if it is a row or column matrix.
* In that case, the value of the stride does not really matter, but
......@@ -163,8 +163,8 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
* - they are not smaller than the number of elements in the array,
* - they are not 0.
*/
int Sz0 = (Nz0 > 1) ? (%(Z)s->strides[0] / elemsize) : (Nz1 + 1);
int Sz1 = (Nz1 > 1) ? (%(Z)s->strides[1] / elemsize) : (Nz0 + 1);
int Sz0 = (Nz0 > 1) ? (PyArray_STRIDES(%(Z)s)[0] / elemsize) : (Nz1 + 1);
int Sz1 = (Nz1 > 1) ? (PyArray_STRIDES(%(Z)s)[1] / elemsize) : (Nz0 + 1);
dtype_%(x)s* x_data = (dtype_%(x)s*) %(x)s->data;
dtype_%(y)s* y_data = (dtype_%(y)s*) %(y)s->data;
......@@ -176,7 +176,7 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
if (Sy < 0)
y_data += (Nz1 - 1) * Sy;
if (%(Z)s->strides[0] == elemsize)
if (PyArray_STRIDES(%(Z)s)[0] == elemsize)
{
if (%(Z)s->descr->type_num == NPY_FLOAT)
{
......@@ -201,7 +201,7 @@ def ger_c_code(A, a, x, y, Z, destructive, fail):
%(fail)s
}
}
else if (%(Z)s->strides[1] == elemsize)
else if (PyArray_STRIDES(%(Z)s)[1] == elemsize)
{
if (%(Z)s->descr->type_num == NPY_FLOAT)
{
......@@ -369,8 +369,8 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
{
float * zoutdata = (float*)%(zz)s->data;
const float * zdata = (float*)%(aa)s->data;
int Ai = %(aa)s->strides[0]/sizeof(float);
int Zi = %(zz)s->strides[0]/sizeof(float);
int Ai = PyArray_STRIDES(%(aa)s)[0]/sizeof(float);
int Zi = PyArray_STRIDES(%(zz)s)[0]/sizeof(float);
for (int i = 0; i < PyArray_DIMS(%(aa)s)[0]; ++i)
{
zoutdata[Zi*i] = fbeta * zdata[Ai*i];
......@@ -380,8 +380,8 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
{
double * zoutdata = (double*) %(zz)s->data;
const double * zdata = (double*)%(aa)s->data;
int Ai = %(aa)s->strides[0]/sizeof(double);
int Zi = %(zz)s->strides[0]/sizeof(double);
int Ai = PyArray_STRIDES(%(aa)s)[0]/sizeof(double);
int Zi = PyArray_STRIDES(%(zz)s)[0]/sizeof(double);
for (int i = 0; i < PyArray_DIMS(%(aa)s)[0]; ++i)
{
zoutdata[Zi*i] = dbeta * zdata[Ai*i];
......@@ -416,10 +416,10 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
* - are not smaller than the number of elements in the array
* - are not 0.
*/
int Sx0 = (Nx0 > 1) ? (%(xx)s->strides[0] / elemsize) : (Nx1 + 1);
int Sx1 = (Nx1 > 1) ? (%(xx)s->strides[1] / elemsize) : (Nx0 + 1);
int Sz = %(zz)s->strides[0] / elemsize;
int Sy = %(yy)s->strides[0] / elemsize;
int Sx0 = (Nx0 > 1) ? (PyArray_STRIDES(%(xx)s)[0] / elemsize) : (Nx1 + 1);
int Sx1 = (Nx1 > 1) ? (PyArray_STRIDES(%(xx)s)[1] / elemsize) : (Nx0 + 1);
int Sz = PyArray_STRIDES(%(zz)s)[0] / elemsize;
int Sy = PyArray_STRIDES(%(yy)s)[0] / elemsize;
dtype_%(yy)s* yy_data = (dtype_%(yy)s*) %(yy)s->data;
dtype_%(zz)s* zz_data = (dtype_%(zz)s*) %(zz)s->data;
......@@ -439,10 +439,10 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
// gemv on reversed matrix and vectors
// - if the copy is too long, maybe call vector/vector dot on
// each row instead
if ((%(xx)s->strides[0] < 0)
|| (%(xx)s->strides[1] < 0)
|| ((%(xx)s->strides[0] != elemsize)
&& (%(xx)s->strides[1] != elemsize)))
if ((PyArray_STRIDES(%(xx)s)[0] < 0)
|| (PyArray_STRIDES(%(xx)s)[1] < 0)
|| ((PyArray_STRIDES(%(xx)s)[0] != elemsize)
&& (PyArray_STRIDES(%(xx)s)[1] != elemsize)))
{
npy_intp dims[2];
dims[0] = Nx0;
......@@ -454,11 +454,11 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
%(fail)s
Py_XDECREF(%(xx)s);
%(xx)s = xx_copy;
Sx0 = (Nx0 > 1) ? (%(xx)s->strides[0] / elemsize) : (Nx1 + 1);
Sx1 = (Nx1 > 1) ? (%(xx)s->strides[1] / elemsize) : (Nx0 + 1);
Sx0 = (Nx0 > 1) ? (PyArray_STRIDES(%(xx)s)[0] / elemsize) : (Nx1 + 1);
Sx1 = (Nx1 > 1) ? (PyArray_STRIDES(%(xx)s)[1] / elemsize) : (Nx0 + 1);
}
if (%(xx)s->strides[0] == elemsize)
if (PyArray_STRIDES(%(xx)s)[0] == elemsize)
{
if (%(xx)s->descr->type_num == NPY_FLOAT)
{
......@@ -488,7 +488,7 @@ def gemv_c_code(aa, xx, yy, zz, alpha, beta, destructive, fail):
%(fail)s
}
}
else if (%(xx)s->strides[1] == elemsize)
else if (PyArray_STRIDES(%(xx)s)[1] == elemsize)
{
if (%(xx)s->descr->type_num == NPY_FLOAT)
{
......
......@@ -801,9 +801,9 @@ def ____gemm_code(check_ab, a_init, b_init):
npy_intp* Ny = PyArray_DIMS(_y);
npy_intp* Nz = PyArray_DIMS(_z);
npy_intp* Sx = _x->strides;
npy_intp* Sy = _y->strides;
npy_intp* Sz = _z->strides;
npy_intp* Sx = PyArray_STRIDES(_x);
npy_intp* Sy = PyArray_STRIDES(_y);
npy_intp* Sz = PyArray_STRIDES(_z);
size_t sx_0, sx_1, sy_0, sy_1, sz_0, sz_1;
......
......@@ -298,7 +298,7 @@ class DimShuffle(Op):
for i, o in enumerate(self.new_order):
if o != 'x':
strides_statements += [('strides[' + str(i)
+ '] = %(basename)s->strides[' + str(o) + ']')]
+ '] = PyArray_STRIDES(%(basename)s)[' + str(o) + ']')]
else:
strides_statements += [('strides[' + str(i) + '] = 0')]
......
......@@ -68,7 +68,7 @@ def make_checks(loop_orders, dtypes, sub):
jump = "(%s) - (%s)" % ("%(var)s_stride%(index)s" % locals(), adjust)
init += """
%(var)s_n%(index)s = PyArray_DIMS(%(var)s)[%(index)s];
%(var)s_stride%(index)s = %(var)s->strides[%(index)s] / sizeof(%(dtype)s);
%(var)s_stride%(index)s = PyArray_STRIDES(%(var)s)[%(index)s] / sizeof(%(dtype)s);
%(var)s_jump%(index)s_%(j)s = %(jump)s;
//printf("%(var)s_jump%(index)s_%(j)s is:");
//std::cout << %(var)s_jump%(index)s_%(j)s << std::endl;
......@@ -263,7 +263,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
for i, index in enumerate(init_loop_orders[olv_index]):
if index != 'x':
order_loops += """
%(ovar)s_loops_it->first = abs(%(ovar)s->strides[%(index)i]);
%(ovar)s_loops_it->first = abs(PyArray_STRIDES(%(ovar)s)[%(index)i]);
""" % locals()
else:
# Stride is 0 when dimension is broadcastable
......
......@@ -308,17 +308,17 @@ class Conv3D(theano.Op):
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )
const int ws0 = %(W)s->strides[0];
const int ws1 = %(W)s->strides[1];
const int ws2 = %(W)s->strides[2];
const int vs1 = %(V)s->strides[1];
const int ws4 = %(W)s->strides[4];
const int vs4 = %(V)s->strides[4];
const int ws3 = %(W)s->strides[3];
const int vs3 = %(V)s->strides[3];
const int vs2 = %(V)s->strides[2];
const int bs = %(b)s->strides[0];
const int hs4 = %(H)s->strides[4];
const int ws0 = PyArray_STRIDES(%(W)s)[0];
const int ws1 = PyArray_STRIDES(%(W)s)[1];
const int ws2 = PyArray_STRIDES(%(W)s)[2];
const int vs1 = PyArray_STRIDES(%(V)s)[1];
const int ws4 = PyArray_STRIDES(%(W)s)[4];
const int vs4 = PyArray_STRIDES(%(V)s)[4];
const int ws3 = PyArray_STRIDES(%(W)s)[3];
const int vs3 = PyArray_STRIDES(%(V)s)[3];
const int vs2 = PyArray_STRIDES(%(V)s)[2];
const int bs = PyArray_STRIDES(%(b)s)[0];
const int hs4 = PyArray_STRIDES(%(H)s)[4];
......@@ -424,20 +424,20 @@ class Conv3D(theano.Op):
Wpos = Wposl + ws2;
Vpos = Vposl + vs2;
} //close l
Wpos = Wposk + %(W)s->strides[1];
Vpos = Vposk + %(V)s->strides[1];
Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k
Hpos = Hpost + %(H)s->strides[3];
Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt;
} //close t
Hpos = Hposc + %(H)s->strides[2];
Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc;
} //close c
Hpos = Hposr + %(H)s->strides[1];
Vpos = Vposr + %(V)s->strides[1] * dr;
Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r
Hpos = Hposi + %(H)s->strides[0];
Vpos = Vposi + %(V)s->strides[0];
Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i
......@@ -515,8 +515,8 @@ class Conv3D(theano.Op):
Wpos = Wposl + ws2;
Vpos = Vposl + vs2;
} //close l
Wpos = Wposk + %(W)s->strides[1];
Vpos = Vposk + %(V)s->strides[1];
Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
} //close k
......@@ -527,17 +527,17 @@ class Conv3D(theano.Op):
//std::cout << "incremented Wpos by " << ws0 << std::endl;
//std::cout << "incremented Hpos by " << hs4 << std::endl;
} //close j
Hpos = Hpost + %(H)s->strides[3];
Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
Vpos = Vpost + vs3 * dt;
} //close t
Hpos = Hposc + %(H)s->strides[2];
Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
Vpos = Vposc + vs2 * dc;
} //close c
Hpos = Hposr + %(H)s->strides[1];
Vpos = Vposr + %(V)s->strides[1] * dr;
Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
} //closes r
Hpos = Hposi + %(H)s->strides[0];
Vpos = Vposi + %(V)s->strides[0];
Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
} //closes i
} //closes general case code
}}}}}}} //extra scope so error handler jumps don't cross declarations
......
......@@ -226,12 +226,12 @@ class ConvGrad3D(theano.Op):
}
{ //extra scope so fail works
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*x->strides[0]+(j)*x->strides[1]+(k)*x->strides[2]+(l)*x->strides[3]+(m)*x->strides[4] )
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )
const int dhs3 = %(dCdH)s->strides[3];
const int dtvs3 = dt * %(V)s->strides[3];
const int dhs3 = PyArray_STRIDES(%(dCdH)s)[3];
const int dtvs3 = dt * PyArray_STRIDES(%(V)s)[3];
// Compute dCdW
//TODO-- see if this can be made faster by using ELEM_AT instead of ELEM5
......@@ -249,8 +249,8 @@ class ConvGrad3D(theano.Op):
for (int i = 0; i < batchSize; i++) {
for (int p = 0; p < outputHeight; p++) {
for (int q = 0; q < outputWidth; q++) {
int Hpos = i * %(dCdH)s->strides[0] + j * %(dCdH)s->strides[4] + p * %(dCdH)s->strides[1] + q * %(dCdH)s->strides[2] ;
int Vpos = i * %(V)s->strides[0] + z * %(V)s->strides[4] + (dr * p+k) * %(V)s->strides[1] + (dc*q+l) * %(V)s->strides[2] + m * %(V)s->strides[3];
int Hpos = i * PyArray_STRIDES(%(dCdH)s)[0] + j * PyArray_STRIDES(%(dCdH)s)[4] + p * PyArray_STRIDES(%(dCdH)s)[1] + q * PyArray_STRIDES(%(dCdH)s)[2] ;
int Vpos = i * PyArray_STRIDES(%(V)s)[0] + z * PyArray_STRIDES(%(V)s)[4] + (dr * p+k) * PyArray_STRIDES(%(V)s)[1] + (dc*q+l) * PyArray_STRIDES(%(V)s)[2] + m * PyArray_STRIDES(%(V)s)[3];
for (int r = 0; r < outputDur; r++) {
writePos += ELEM5(%(dCdH)s,i,p,q,r,j) * ELEM5(%(V)s,i,dr*p+k,dc*q+l,dt*r+m,z);
......
......@@ -237,17 +237,17 @@ class ConvTransp3D(theano.Op):
{ // for fail 6
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*x->strides[0]+(j)*x->strides[1]+(k)*x->strides[2]+(l)*x->strides[3]+(m)*x->strides[4] )
#define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( x->data + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )
dtype_%(b)s * b = (dtype_%(b)s *) %(b)s->data;
int rs4 = %(R)s->strides[4];
int ws0 = %(W)s->strides[0];
int ws4 = %(W)s->strides[4];
int hs4 = %(H)s->strides[4];
int rs4 = PyArray_STRIDES(%(R)s)[4];
int ws0 = PyArray_STRIDES(%(W)s)[0];
int ws4 = PyArray_STRIDES(%(W)s)[4];
int hs4 = PyArray_STRIDES(%(H)s)[4];
// Compute R
// R[i,r,c,t,j] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, rk, ck, tk,j] * H[i,rc,cc,tc,k]
......@@ -260,7 +260,7 @@ class ConvTransp3D(theano.Op):
for (int t = 0; t < videoDur; t++) {
const int ftc = (int)std::max(0.0f, ceilf(float(t-filterDur +1) /float(dt)));
long long Rpost = i * %(R)s->strides[0] + r * %(R)s->strides[1] + c * %(R)s->strides[2] + t * %(R)s->strides[3];
long long Rpost = i * PyArray_STRIDES(%(R)s)[0] + r * PyArray_STRIDES(%(R)s)[1] + c * PyArray_STRIDES(%(R)s)[2] + t * PyArray_STRIDES(%(R)s)[3];
long long Rpos = Rpost;
for (int j = 0; j < inputChannels; j++)
......@@ -284,8 +284,8 @@ class ConvTransp3D(theano.Op):
const int tk = t - tc * dt;
if (tk < 0) break;
int Wpos = rk * %(W)s->strides[1] + ck * %(W)s->strides[2] + tk * %(W)s->strides[3];
int Hpostc = i * %(H)s->strides[0] + rc * %(H)s->strides[1] + cc * %(H)s->strides[2] + tc * %(H)s->strides[3];
int Wpos = rk * PyArray_STRIDES(%(W)s)[1] + ck * PyArray_STRIDES(%(W)s)[2] + tk * PyArray_STRIDES(%(W)s)[3];
int Hpostc = i * PyArray_STRIDES(%(H)s)[0] + rc * PyArray_STRIDES(%(H)s)[1] + cc * PyArray_STRIDES(%(H)s)[2] + tc * PyArray_STRIDES(%(H)s)[3];
Rpos = Rpost;
for (int j = 0; j < inputChannels; j++)
{
......
差异被折叠。
......@@ -159,15 +159,15 @@ class SoftmaxWithBias(gof.Op):
double sum = 0.0;
bool discount_max = false;
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * i);
const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(%(x)s->data + PyArray_STRIDES(%(x)s)[0] * i);
const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(%(b)s->data);
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i);
dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + PyArray_STRIDES(%(sm)s)[0] * i);
"""
inside_row_loop = """
npy_intp Sx = %(x)s->strides[1]/sizeof(dtype_%(x)s);
npy_intp Sb = %(b)s->strides[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
size_t row_max_j=0;
dtype_%(sm)s row_max = x_i[0] + b_i[0];
......@@ -306,12 +306,12 @@ class SoftmaxGrad(gof.Op):
for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
{
const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (%(dy)s->data + %(dy)s->strides[0] * i);
npy_intp Sdy = %(dy)s->strides[1]/sizeof(dtype_%(dy)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*) (%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s);
const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (%(dy)s->data + PyArray_STRIDES(%(dy)s)[0] * i);
npy_intp Sdy = PyArray_STRIDES(%(dy)s)[1]/sizeof(dtype_%(dy)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (%(sm)s->data + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*) (%(dx)s->data + PyArray_STRIDES(%(dx)s)[0] * i);
npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
double sum_dy_times_sm = 0.;
for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
......@@ -825,9 +825,9 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
""",
begin_row_loop,
"""
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(%(nll)s->data + %(nll)s->strides[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + %(am)s->strides[0] * i);
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(%(nll)s->data + PyArray_STRIDES(%(nll)s)[0] * i);
%(am_type)s* __restrict__ am_i = (%(am_type)s*) (%(am)s->data + PyArray_STRIDES(%(am)s)[0] * i);
""",
inside_row_loop,
"""
......@@ -977,15 +977,15 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
{
const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(%(dnll)s->data + %(dnll)s->strides[0] * i))[0];
const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(%(dnll)s->data + PyArray_STRIDES(%(dnll)s)[0] * i))[0];
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + %(y_idx)s->strides[0] * i))[0];
const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(%(y_idx)s->data + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + %(sm)s->strides[0] * i);
npy_intp Ssm = %(sm)s->strides[1]/sizeof(dtype_%(sm)s);
const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(%(sm)s->data + PyArray_STRIDES(%(sm)s)[0] * i);
npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(%(dx)s->data + %(dx)s->strides[0] * i);
npy_intp Sdx = %(dx)s->strides[1]/sizeof(dtype_%(dx)s);
dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(%(dx)s->data + PyArray_STRIDES(%(dx)s)[0] * i);
npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);
for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
{
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论