提交 54a1c069 authored 作者: Cesar Laurent's avatar Cesar Laurent

Corrected c_code.

上级 c5d46d06
...@@ -9,7 +9,6 @@ from __future__ import absolute_import, print_function, division ...@@ -9,7 +9,6 @@ from __future__ import absolute_import, print_function, division
import warnings import warnings
import numpy import numpy
from six import integer_types
from six.moves import xrange from six.moves import xrange
import six.moves.builtins as builtins import six.moves.builtins as builtins
import theano import theano
...@@ -397,14 +396,19 @@ class Pool(OpenMPOp): ...@@ -397,14 +396,19 @@ class Pool(OpenMPOp):
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
ws0, ws1 = ws
st0, st1 = stride
pd0, pd1 = pad
if self.openmp: if self.openmp:
omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, collector) schedule(static)' omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, collector) schedule(static)'
else: else:
omp_parallel = '' omp_parallel = ''
ccode = """ ccode = """
// Getting ws, stride and pad
int ws0, ws1, st0, st1, pd0, pd1;
ws0 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 1));
int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0); int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int z_r, z_c; // shape of the output int z_r, z_c; // shape of the output
int r, c; // shape of the padded_input int r, c; // shape of the padded_input
...@@ -415,9 +419,9 @@ class Pool(OpenMPOp): ...@@ -415,9 +419,9 @@ class Pool(OpenMPOp):
} }
r = PyArray_DIMS(%(x)s)[2]; r = PyArray_DIMS(%(x)s)[2];
c = PyArray_DIMS(%(x)s)[3]; c = PyArray_DIMS(%(x)s)[3];
r += %(pd0)s * 2; r += pd0 * 2;
c += %(pd1)s * 2; c += pd1 * 2;
if (%(pd0)s != 0 && %(pd1)s != 0 && !%(ignore_border)s) if (pd0 != 0 && pd1 != 0 && !%(ignore_border)s)
{ {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"padding must be (0,0) when ignore border is False"); "padding must be (0,0) when ignore border is False");
...@@ -426,42 +430,42 @@ class Pool(OpenMPOp): ...@@ -426,42 +430,42 @@ class Pool(OpenMPOp):
if (%(ignore_border)s) if (%(ignore_border)s)
{ {
// '/' in C is different from '/' in python // '/' in C is different from '/' in python
if (r - %(ws0)s < 0) if (r - ws0 < 0)
{ {
z_r = 0; z_r = 0;
} }
else else
{ {
z_r = (r - %(ws0)s) / %(st0)s + 1; z_r = (r - ws0) / st0 + 1;
} }
if (c - %(ws1)s < 0) if (c - ws1 < 0)
{ {
z_c = 0; z_c = 0;
} }
else else
{ {
z_c = (c - %(ws1)s) / %(st1)s + 1; z_c = (c - ws1) / st1 + 1;
} }
} }
else else
{ {
// decide how many rows the output has // decide how many rows the output has
if (%(st0)s >= %(ws0)s) if (st0 >= ws0)
{ {
z_r = (r - 1) / %(st0)s + 1; z_r = (r - 1) / st0 + 1;
} }
else else
{ {
z_r = std::max(0, (r - 1 - %(ws0)s + %(st0)s) / %(st0)s) + 1; z_r = std::max(0, (r - 1 - ws0 + st0) / st0) + 1;
} }
// decide how many columns the output has // decide how many columns the output has
if (%(st1)s >= %(ws1)s) if (st1 >= ws1)
{ {
z_c = (c - 1) / %(st1)s + 1; z_c = (c - 1) / st1 + 1;
} }
else else
{ {
z_c = std::max(0, (c - 1 - %(ws1)s + %(st0)s) / %(st1)s) + 1; z_c = std::max(0, (c - 1 - ws1 + st0) / st1) + 1;
} }
assert(z_r > 0); assert(z_r > 0);
assert(z_c > 0); assert(z_c > 0);
...@@ -494,30 +498,30 @@ class Pool(OpenMPOp): ...@@ -494,30 +498,30 @@ class Pool(OpenMPOp):
int b = t %% PyArray_DIMS(%(x)s)[0]; int b = t %% PyArray_DIMS(%(x)s)[0];
int k = t / PyArray_DIMS(%(x)s)[0]; int k = t / PyArray_DIMS(%(x)s)[0];
for(int i=0; i < z_r; i++){ for(int i=0; i < z_r; i++){
r_st = i * %(st0)s; r_st = i * st0;
r_end = r_st + %(ws0)s; r_end = r_st + ws0;
// skip the padding // skip the padding
r_st = r_st < %(pd0)s ? %(pd0)s : r_st; r_st = r_st < pd0 ? pd0 : r_st;
r_end = r_end > (r - %(pd0)s) ? r - %(pd0)s : r_end; r_end = r_end > (r - pd0) ? r - pd0 : r_end;
// from padded_img space to img space // from padded_img space to img space
r_st -= %(pd0)s; r_st -= pd0;
r_end -= %(pd0)s; r_end -= pd0;
// handle the case where no padding, ignore border is True // handle the case where no padding, ignore border is True
if (%(ignore_border)s) if (%(ignore_border)s)
{ {
r_end = r_end > r ? r : r_end; r_end = r_end > r ? r : r_end;
} }
for(int j=0; j<z_c; j++){ for(int j=0; j<z_c; j++){
c_st = j * %(st1)s; c_st = j * st1;
c_end = c_st + %(ws1)s; c_end = c_st + ws1;
// skip the padding // skip the padding
c_st = c_st < %(pd1)s ? %(pd1)s : c_st; c_st = c_st < pd1 ? pd1 : c_st;
c_end = c_end > (c - %(pd1)s) ? c - %(pd1)s : c_end; c_end = c_end > (c - pd1) ? c - pd1 : c_end;
dtype_%(z)s * z = ( dtype_%(z)s * z = (
(dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, b, k, i, j))); (dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, b, k, i, j)));
// change coordinates from padding_img space into img space // change coordinates from padding_img space into img space
c_st -= %(pd1)s; c_st -= pd1;
c_end -= %(pd1)s; c_end -= pd1;
// handle the case where no padding, ignore border is True // handle the case where no padding, ignore border is True
if (%(ignore_border)s) if (%(ignore_border)s)
{ {
...@@ -559,7 +563,7 @@ class Pool(OpenMPOp): ...@@ -559,7 +563,7 @@ class Pool(OpenMPOp):
""" """
elif self.mode == 'average_inc_pad' and self.ignore_border: elif self.mode == 'average_inc_pad' and self.ignore_border:
ccode += """ ccode += """
z[0] = collector / (%(ws0)s * %(ws1)s); z[0] = collector / (ws0 * ws1);
""" """
else: else:
ccode += """ ccode += """
...@@ -784,9 +788,6 @@ class MaxPoolGrad(PoolGrad): ...@@ -784,9 +788,6 @@ class MaxPoolGrad(PoolGrad):
gx, = out gx, = out
fail = sub['fail'] fail = sub['fail']
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
ws0, ws1 = ws
st0, st1 = stride
pd0, pd1 = pad
if self.openmp: if self.openmp:
omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)' omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)'
else: else:
...@@ -816,14 +817,22 @@ class MaxPoolGrad(PoolGrad): ...@@ -816,14 +817,22 @@ class MaxPoolGrad(PoolGrad):
PyErr_SetString(PyExc_ValueError, "gz must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "gz must be a 4d ndarray");
%(fail)s; %(fail)s;
} }
// Getting ws, stride and pad
int ws0, ws1, st0, st1, pd0, pd1;
ws0 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 1));
int z_r, z_c; int z_r, z_c;
z_r = PyArray_DIMS(%(z)s)[2]; z_r = PyArray_DIMS(%(z)s)[2];
z_c = PyArray_DIMS(%(z)s)[3]; z_c = PyArray_DIMS(%(z)s)[3];
int r, c; // shape of the padded_input int r, c; // shape of the padded_input
r = PyArray_DIMS(%(x)s)[2]; r = PyArray_DIMS(%(x)s)[2];
c = PyArray_DIMS(%(x)s)[3]; c = PyArray_DIMS(%(x)s)[3];
r += %(pd0)s * 2; r += pd0 * 2;
c += %(pd1)s * 2; c += pd1 * 2;
// allocating memory for gx // allocating memory for gx
if ((!%(gx)s) if ((!%(gx)s)
|| !PyArray_ISCONTIGUOUS(%(gx)s) || !PyArray_ISCONTIGUOUS(%(gx)s)
...@@ -849,23 +858,23 @@ class MaxPoolGrad(PoolGrad): ...@@ -849,23 +858,23 @@ class MaxPoolGrad(PoolGrad):
int b = t %% PyArray_DIMS(%(x)s)[0]; int b = t %% PyArray_DIMS(%(x)s)[0];
int k = t / PyArray_DIMS(%(x)s)[0]; int k = t / PyArray_DIMS(%(x)s)[0];
for(int i=0; i < z_r; i++){ for(int i=0; i < z_r; i++){
r_st = i * %(st0)s; r_st = i * st0;
r_end = r_st + %(ws0)s; r_end = r_st + ws0;
// skip the padding // skip the padding
r_st = r_st < %(pd0)s ? %(pd0)s : r_st; r_st = r_st < pd0 ? pd0 : r_st;
r_end = r_end > (r - %(pd0)s) ? r - %(pd0)s : r_end; r_end = r_end > (r - pd0) ? r - pd0 : r_end;
// from padded_img space to img space // from padded_img space to img space
r_st -= %(pd0)s; r_st -= pd0;
r_end -= %(pd0)s; r_end -= pd0;
for(int j=0; j<z_c; j++){ for(int j=0; j<z_c; j++){
c_st = j * %(st1)s; c_st = j * st1;
c_end = c_st + %(ws1)s; c_end = c_st + ws1;
// skip the padding // skip the padding
c_st = c_st < %(pd1)s ? %(pd1)s : c_st; c_st = c_st < pd1 ? pd1 : c_st;
c_end = c_end > (c - %(pd1)s) ? c - %(pd1)s : c_end; c_end = c_end > (c - pd1) ? c - pd1 : c_end;
// change coordinates from padding_img space into img space // change coordinates from padding_img space into img space
c_st -= %(pd1)s; c_st -= pd1;
c_end -= %(pd1)s; c_end -= pd1;
// the maximum value // the maximum value
maximum = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,b,k,i,j)))[0]; maximum = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,b,k,i,j)))[0];
// the gradient corresponding to this maximum value in z // the gradient corresponding to this maximum value in z
...@@ -1096,14 +1105,19 @@ class DownsampleFactorMaxGradGrad(OpenMPOp): ...@@ -1096,14 +1105,19 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
z, = out # the grad of grad z, = out # the grad of grad
fail = sub['fail'] fail = sub['fail']
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
ws0, ws1 = ws
st0, st1 = stride
pd0, pd1 = pad
if self.openmp: if self.openmp:
omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)' omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)'
else: else:
omp_parallel = '' omp_parallel = ''
return """ return """
// Getting ws, stride and pad
int ws0, ws1, st0, st1, pd0, pd1;
ws0 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 1));
int z_typenum = PyArray_ObjectType((PyObject*)%(maxout)s, 0); int z_typenum = PyArray_ObjectType((PyObject*)%(maxout)s, 0);
int z_r, z_c; int z_r, z_c;
z_r = PyArray_DIMS(%(maxout)s)[2]; z_r = PyArray_DIMS(%(maxout)s)[2];
...@@ -1111,8 +1125,8 @@ class DownsampleFactorMaxGradGrad(OpenMPOp): ...@@ -1111,8 +1125,8 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
int r, c; // shape of the padded_input int r, c; // shape of the padded_input
r = PyArray_DIMS(%(x)s)[2]; r = PyArray_DIMS(%(x)s)[2];
c = PyArray_DIMS(%(x)s)[3]; c = PyArray_DIMS(%(x)s)[3];
r += %(pd0)s * 2; r += pd0 * 2;
c += %(pd1)s * 2; c += pd1 * 2;
// allocating memory for output // allocating memory for output
if ((!%(z)s) if ((!%(z)s)
|| !PyArray_ISCONTIGUOUS(%(z)s) || !PyArray_ISCONTIGUOUS(%(z)s)
...@@ -1136,23 +1150,23 @@ class DownsampleFactorMaxGradGrad(OpenMPOp): ...@@ -1136,23 +1150,23 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
int b = t %% PyArray_DIMS(%(x)s)[0]; int b = t %% PyArray_DIMS(%(x)s)[0];
int k = t / PyArray_DIMS(%(x)s)[0]; int k = t / PyArray_DIMS(%(x)s)[0];
for(int i=0; i < z_r; i++){ for(int i=0; i < z_r; i++){
r_st = i * %(st0)s; r_st = i * st0;
r_end = r_st + %(ws0)s; r_end = r_st + ws0;
// skip the padding // skip the paddin_g
r_st = r_st < %(pd0)s ? %(pd0)s : r_st; r_st = r_st < pd0 ? pd0 : r_st;
r_end = r_end > (r - %(pd0)s) ? r - %(pd0)s : r_end; r_end = r_end > (r - pd0) ? r - pd0 : r_end;
// from padded_img space to img space // from padded_img space to img space
r_st -= %(pd0)s; r_st -= pd0;
r_end -= %(pd0)s; r_end -= pd0;
for(int j=0; j<z_c; j++){ for(int j=0; j<z_c; j++){
c_st = j * %(st1)s; c_st = j * st1;
c_end = c_st + %(ws1)s; c_end = c_st + ws1;
// skip the padding // skip the padding
c_st = c_st < %(pd1)s ? %(pd1)s : c_st; c_st = c_st < pd1 ? pd1 : c_st;
c_end = c_end > (c - %(pd1)s) ? c - %(pd1)s : c_end; c_end = c_end > (c - pd1) ? c - pd1 : c_end;
// from padding_img space into img space // from padding_img space into img space
c_st -= %(pd1)s; c_st -= pd1;
c_end -= %(pd1)s; c_end -= pd1;
// the maximum value // the maximum value
maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,b,k,i,j)))[0]; maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,b,k,i,j)))[0];
// z at this position // z at this position
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论