提交 fb3b1fcf authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2696 from yaoli/maxpool_c_code

c code of maxpool
...@@ -287,77 +287,149 @@ class DownsampleFactorMax(Op): ...@@ -287,77 +287,149 @@ class DownsampleFactorMax(Op):
st=self.st, padding=self.padding)( st=self.st, padding=self.padding)(
x, maxout, gz)] x, maxout, gz)]
def c_headers(self):
return ['<algorithm>']
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
# No implementation is currently for the case where
# the stride size and the pooling size are different.
# An exception is raised for such a case.
if self.ds != self.st or self.padding != (0, 0):
raise theano.gof.utils.MethodNotDefined()
x, = inp x, = inp
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds ds0, ds1 = self.ds
st0, st1 = self.st
pd0, pd1 = self.padding
return """ return """
int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0); int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int x_shp0_usable; int z_r, z_c; // shape of the output
int x_shp1_usable; int r, c; // shape of the padded_input
int z_shp0, z_shp1;
if(PyArray_NDIM(%(x)s)!=4) if(PyArray_NDIM(%(x)s)!=4)
{ {
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
%(fail)s; %(fail)s;
} }
z_shp0 = PyArray_DIMS(%(x)s)[2] / %(ds0)s; r = PyArray_DIMS(%(x)s)[2];
z_shp1 = PyArray_DIMS(%(x)s)[3] / %(ds1)s; c = PyArray_DIMS(%(x)s)[3];
r += %(pd0)s * 2;
c += %(pd1)s * 2;
if (%(pd0)s != 0 && %(pd1)s != 0 && !%(ignore_border)s)
{
PyErr_SetString(PyExc_ValueError,
"padding must be (0,0) when ignore border is False");
%(fail)s;
}
if (%(ignore_border)s) if (%(ignore_border)s)
{ {
x_shp0_usable = z_shp0 * %(ds0)s;
x_shp1_usable = z_shp1 * %(ds1)s; // '/' in C is different from '/' in python
if (r - %(ds0)s < 0)
{
z_r = 0;
}
else
{
z_r = (r - %(ds0)s) / %(st0)s + 1;
}
if (c - %(ds1)s < 0)
{
z_c = 0;
}
else
{
z_c = (c - %(ds1)s) / %(st1)s + 1;
}
} }
else else
{ {
z_shp0 += (PyArray_DIMS(%(x)s)[2] %% %(ds0)s) ? 1 : 0; // decide how many rows the output has
z_shp1 += (PyArray_DIMS(%(x)s)[3] %% %(ds1)s) ? 1 : 0; if (%(st0)s >= %(ds0)s)
x_shp0_usable = PyArray_DIMS(%(x)s)[2]; {
x_shp1_usable = PyArray_DIMS(%(x)s)[3]; z_r = (r - 1) / %(st0)s + 1;
}
else
{
z_r = std::max(0, (r - 1 - %(ds0)s) / %(st0)s + 1) + 1;
}
// decide how many columns the output has
if (%(st1)s >= %(ds1)s)
{
z_c = (c - 1) / %(st1)s + 1;
}
else
{
z_c = std::max(0, (c - 1 - %(ds1)s) / %(st1)s + 1) + 1;
}
} }
// memory allocation of z if necessary
if ((!%(z)s) if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4 || *PyArray_DIMS(%(z)s)!=4
||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(x)s)[0]) ||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(x)s)[0])
||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(x)s)[1]) ||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(x)s)[1])
||(PyArray_DIMS(%(z)s)[2] != z_shp0) ||(PyArray_DIMS(%(z)s)[2] != z_r)
||(PyArray_DIMS(%(z)s)[3] != z_shp1) ||(PyArray_DIMS(%(z)s)[3] != z_c)
) )
{ {
if (%(z)s) Py_XDECREF(%(z)s); if (%(z)s) Py_XDECREF(%(z)s);
npy_intp dims[4] = {0,0,0,0}; npy_intp dims[4] = {0,0,0,0};
dims[0]=PyArray_DIMS(%(x)s)[0]; dims[0]=PyArray_DIMS(%(x)s)[0];
dims[1]=PyArray_DIMS(%(x)s)[1]; dims[1]=PyArray_DIMS(%(x)s)[1];
dims[2]=z_shp0; dims[2]=z_r;
dims[3]=z_shp1; dims[3]=z_c;
//TODO: zeros not necessary //TODO: zeros not necessary
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0); %(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
} }
if (z_shp0 && z_shp1) // used for indexing a pool region inside the input
int r_st, r_end, c_st, c_end;
dtype_%(x)s maximum; // temp var for maximum value in a region
if (z_r && z_c)
{ {
for(int b=0;b<PyArray_DIMS(%(x)s)[0];b++){ for(int b=0; b<PyArray_DIMS(%(x)s)[0]; b++){
for(int k=0;k<PyArray_DIMS(%(x)s)[1];k++){ for(int k=0; k<PyArray_DIMS(%(x)s)[1]; k++){
int mini_i = 0; for(int i=0; i< z_r; i++){
int zi = 0; r_st = i * %(st0)s;
for(int i=0;i< x_shp0_usable; i++){ r_end = r_st + %(ds0)s;
int mini_j = 0; // skip the padding
int zj = 0; r_st = r_st < %(pd0)s ? %(pd0)s : r_st;
for(int j=0; j<x_shp1_usable; j++){ r_end = r_end > (r - %(pd0)s) ? r - %(pd0)s : r_end;
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,i,j)))[0]; // from padded_img space to img space
dtype_%(z)s * __restrict__ z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,b,k,zi,zj))); r_st -= %(pd0)s;
z[0] = (((mini_j|mini_i) == 0) || z[0] < a) ? a : z[0]; r_end -= %(pd0)s;
mini_j = ((mini_j + 1) == %(ds1)s) ? 0 : mini_j+1;
zj += (mini_j == 0); // handle the case where no padding, ignore border is True
if (%(ignore_border)s)
{
r_end = r_end > r ? r : r_end;
}
for(int j=0; j<z_c; j++){
c_st = j * %(st1)s;
c_end = c_st + %(ds1)s;
// skip the padding
c_st = c_st < %(pd1)s ? %(pd1)s : c_st;
c_end = c_end > (c - %(pd1)s) ? c - %(pd1)s : c_end;
dtype_%(z)s * z = (
(dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, b, k, i, j)));
// change coordinates from padding_img space into img space
c_st -= %(pd1)s;
c_end -= %(pd1)s;
// handle the case where no padding, ignore border is True
if (%(ignore_border)s)
{
c_end = c_end > c ? c : c_end;
}
// use the first element as the initial value of maximum
maximum = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,r_st,c_st)))[0];
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
{
for(int n=c_st; n<c_end; n++)
{
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
maximum = (a > maximum) ? a : maximum;
}
}
z[0] = maximum;
} }
mini_i = ((mini_i + 1) == %(ds0)s) ? 0 : mini_i+1;
zi += (mini_i == 0);
} }
} }
} }
...@@ -365,7 +437,7 @@ class DownsampleFactorMax(Op): ...@@ -365,7 +437,7 @@ class DownsampleFactorMax(Op):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (0, 2) return (0, 6)
class DownsampleFactorMaxGrad(Op): class DownsampleFactorMaxGrad(Op):
...@@ -409,7 +481,6 @@ class DownsampleFactorMaxGrad(Op): ...@@ -409,7 +481,6 @@ class DownsampleFactorMaxGrad(Op):
pad_w = self.padding[1] pad_w = self.padding[1]
img_rows = x.shape[-2] + 2 * pad_h img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w img_cols = x.shape[-1] + 2 * pad_w
# pad the image # pad the image
if self.padding != (0, 0): if self.padding != (0, 0):
fill = x.min()-1 fill = x.min()-1
......
import unittest import unittest
import __builtin__ import __builtin__
import numpy import numpy
import theano
import theano.tensor as tensor import theano.tensor as tensor
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.signal.downsample import (DownsampleFactorMax, max_pool_2d, from theano.tensor.signal.downsample import (DownsampleFactorMax, max_pool_2d,
...@@ -467,11 +468,11 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -467,11 +468,11 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
test_input_array = numpy.array([[[ test_input_array = numpy.array([[[
[1., 2., 3., 4.], [1., 2., 3., 4.],
[5., 6., 7., 8.] [5., 6., 7., 8.]
]]]) ]]]).astype(theano.config.floatX)
test_answer_array = numpy.array([[[ test_answer_array = numpy.array([[[
[0., 0., 0., 0.], [0., 0., 0., 0.],
[0., 6., 0., 8.] [0., 6., 0., 8.]
]]]) ]]]).astype(theano.config.floatX)
input = tensor.tensor4(name='input') input = tensor.tensor4(name='input')
patch_size = (2, 2) patch_size = (2, 2)
op = max_pool_2d_same_size(input, patch_size) op = max_pool_2d_same_size(input, patch_size)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论