提交 40bb994b authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2882 from gvtulder/downsample-average-c-implementation

Downsample: C implementation for average, sum
...@@ -64,10 +64,10 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0), ...@@ -64,10 +64,10 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
of the images, pad_h is the size of the top and bottom margins, of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins. and pad_w is the size of the left and right margins.
:type padding: tuple of two ints :type padding: tuple of two ints
:param mode: 'max', 'average_inc_pad' or 'average_exc_pad'. :param mode: 'max', 'sum', 'average_inc_pad' or 'average_exc_pad'.
Operation executed on each window. `max` always excludes the padding Operation executed on each window. `max` and `sum` always exclude
in the computation. `average` gives you the choice to include or the padding in the computation. `average` gives you the choice to
exclude it. include or exclude it.
:type mode: string :type mode: string
""" """
if input.ndim < 2: if input.ndim < 2:
...@@ -104,7 +104,7 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0), ...@@ -104,7 +104,7 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
class DownsampleFactorMax(Op): class DownsampleFactorMax(Op):
"""For N-dimensional tensors, consider that the last two """For N-dimensional tensors, consider that the last two
dimensions span images. This Op downsamples these images by dimensions span images. This Op downsamples these images by
taking the max or average over different patch. taking the max, sum or average over different patch.
""" """
__props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode') __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
...@@ -188,7 +188,7 @@ class DownsampleFactorMax(Op): ...@@ -188,7 +188,7 @@ class DownsampleFactorMax(Op):
def __init__(self, ds, ignore_border=False, st=None, padding=(0, 0), def __init__(self, ds, ignore_border=False, st=None, padding=(0, 0),
mode='max'): mode='max'):
""" Take the max or average or different input patches. """ Take the max, sum or average or different input patches.
:param ds: downsample factor over rows and column. :param ds: downsample factor over rows and column.
ds indicates the pool region size. ds indicates the pool region size.
...@@ -210,8 +210,8 @@ class DownsampleFactorMax(Op): ...@@ -210,8 +210,8 @@ class DownsampleFactorMax(Op):
and pad_w is the size of the left and right margins. and pad_w is the size of the left and right margins.
:type padding: tuple of two ints :type padding: tuple of two ints
:param mode: 'max', 'average_inc_pad', 'average_exc_pad'. :param mode: 'max', 'sum', 'average_inc_pad', 'average_exc_pad'.
('average_inc_pad' exclude the padding from the count, ('average_inc_pad' excludes the padding from the count,
'average_exc_pad' include it) 'average_exc_pad' include it)
""" """
...@@ -232,9 +232,9 @@ class DownsampleFactorMax(Op): ...@@ -232,9 +232,9 @@ class DownsampleFactorMax(Op):
if self.padding[0] >= self.ds[0] or self.padding[1] >= self.ds[1]: if self.padding[0] >= self.ds[0] or self.padding[1] >= self.ds[1]:
raise NotImplementedError( raise NotImplementedError(
'padding_h and padding_w must be smaller than strides') 'padding_h and padding_w must be smaller than strides')
if mode not in ['max', 'average_inc_pad', 'average_exc_pad']: if mode not in ['max', 'average_inc_pad', 'average_exc_pad', 'sum']:
raise ValueError( raise ValueError(
"DownsampleFactorMax mode parameter only support 'max'," "DownsampleFactorMax mode parameter only support 'max', 'sum',"
" 'average_inc_pad' and 'average_exc_pad'. Got %s" % mode) " 'average_inc_pad' and 'average_exc_pad'. Got %s" % mode)
self.mode = mode self.mode = mode
...@@ -277,7 +277,9 @@ class DownsampleFactorMax(Op): ...@@ -277,7 +277,9 @@ class DownsampleFactorMax(Op):
else: else:
y = x y = x
func = numpy.max func = numpy.max
if self.mode != 'max': if self.mode == 'sum':
func = numpy.sum
elif self.mode != 'max':
func = numpy.average func = numpy.average
for n in xrange(x.shape[0]): for n in xrange(x.shape[0]):
...@@ -317,7 +319,7 @@ class DownsampleFactorMax(Op): ...@@ -317,7 +319,7 @@ class DownsampleFactorMax(Op):
return ['<algorithm>'] return ['<algorithm>']
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
if self.mode != 'max': if self.mode not in ('max', 'sum', 'average_exc_pad', 'average_inc_pad'):
raise theano.gof.utils.MethodNotDefined() raise theano.gof.utils.MethodNotDefined()
x, = inp x, = inp
z, = out z, = out
...@@ -326,7 +328,7 @@ class DownsampleFactorMax(Op): ...@@ -326,7 +328,7 @@ class DownsampleFactorMax(Op):
ds0, ds1 = self.ds ds0, ds1 = self.ds
st0, st1 = self.st st0, st1 = self.st
pd0, pd1 = self.padding pd0, pd1 = self.padding
return """ ccode = """
int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0); int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int z_r, z_c; // shape of the output int z_r, z_c; // shape of the output
int r, c; // shape of the padded_input int r, c; // shape of the padded_input
...@@ -409,7 +411,7 @@ class DownsampleFactorMax(Op): ...@@ -409,7 +411,7 @@ class DownsampleFactorMax(Op):
// used for indexing a pool region inside the input // used for indexing a pool region inside the input
int r_st, r_end, c_st, c_end; int r_st, r_end, c_st, c_end;
dtype_%(x)s maximum; // temp var for maximum value in a region dtype_%(x)s collector; // temp var for the value in a region
if (z_r && z_c) if (z_r && z_c)
{ {
for(int b=0; b<PyArray_DIMS(%(x)s)[0]; b++){ for(int b=0; b<PyArray_DIMS(%(x)s)[0]; b++){
...@@ -445,28 +447,59 @@ class DownsampleFactorMax(Op): ...@@ -445,28 +447,59 @@ class DownsampleFactorMax(Op):
{ {
c_end = c_end > c ? c : c_end; c_end = c_end > c ? c : c_end;
} }
// use the first element as the initial value of maximum """
maximum = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,r_st,c_st)))[0]; if self.mode == 'max':
ccode += """
// use the first element as the initial value of collector
collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,r_st,c_st)))[0];
// go through the pooled region in the unpadded input // go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++) for(int m=r_st; m<r_end; m++)
{ {
for(int n=c_st; n<c_end; n++) for(int n=c_st; n<c_end; n++)
{ {
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0]; dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
maximum = (a > maximum) ? a : maximum; collector = (a > collector) ? a : collector;
} }
} }
z[0] = maximum; z[0] = collector;
"""
elif self.mode in ('sum', 'average_exc_pad', 'average_inc_pad'):
ccode += """
// initialize the sum at zero
collector = ((dtype_%(x)s)(0));
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
{
for(int n=c_st; n<c_end; n++)
{
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
collector += a;
} }
} }
"""
if self.mode == "sum":
ccode += """
z[0] = collector;
"""
elif self.mode == 'average_inc_pad' and self.ignore_border:
ccode += """
z[0] = collector / (%(ds0)s * %(ds1)s);
"""
else:
ccode += """
z[0] = collector / ((r_end-r_st)*(c_end-c_st));
"""
ccode += """
} }
} }
} }
""" % locals() }
}
"""
return ccode % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (0, 6) return (0, 6, 8, 3)
class DownsampleFactorMaxGrad(Op): class DownsampleFactorMaxGrad(Op):
__props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode') __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
...@@ -478,9 +511,9 @@ class DownsampleFactorMaxGrad(Op): ...@@ -478,9 +511,9 @@ class DownsampleFactorMaxGrad(Op):
st = ds st = ds
self.st = tuple(st) self.st = tuple(st)
self.padding = tuple(padding) self.padding = tuple(padding)
if mode not in ['max', 'average_inc_pad', 'average_exc_pad']: if mode not in ['max', 'sum', 'average_inc_pad', 'average_exc_pad']:
raise ValueError( raise ValueError(
"DownsampleFactorMax mode parameter only support 'max'," "DownsampleFactorMax mode parameter only support 'max', 'sum',"
" 'average_inc_pad' and 'average_exc_pad'. Got %s" % mode) " 'average_inc_pad' and 'average_exc_pad'. Got %s" % mode)
self.mode = mode self.mode = mode
...@@ -497,7 +530,7 @@ class DownsampleFactorMaxGrad(Op): ...@@ -497,7 +530,7 @@ class DownsampleFactorMaxGrad(Op):
return Apply(self, [x, maxout, gz], [x.type()]) return Apply(self, [x, maxout, gz], [x.type()])
def perform(self, node, inp, out): def perform(self, node, inp, out):
if self.mode != 'max' and self.padding != (0, 0): if self.mode not in ('max', 'sum') and self.padding != (0, 0):
raise NotImplementedError() raise NotImplementedError()
x, maxout, gz = inp x, maxout, gz = inp
gx_stg, = out gx_stg, = out
...@@ -512,6 +545,7 @@ class DownsampleFactorMaxGrad(Op): ...@@ -512,6 +545,7 @@ class DownsampleFactorMaxGrad(Op):
img_rows = x.shape[-2] + 2 * pad_h img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w img_cols = x.shape[-1] + 2 * pad_w
inc_pad = self.mode == 'average_inc_pad' inc_pad = self.mode == 'average_inc_pad'
sum_mode = self.mode == 'sum'
# pad the image # pad the image
if self.padding != (0, 0): if self.padding != (0, 0):
...@@ -539,18 +573,21 @@ class DownsampleFactorMaxGrad(Op): ...@@ -539,18 +573,21 @@ class DownsampleFactorMaxGrad(Op):
for n in xrange(x.shape[0]): for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]): for k in xrange(x.shape[1]):
for r in xrange(pr): for r in xrange(pr):
if inc_pad: if sum_mode or inc_pad:
row_st = r * st0 row_st = r * st0
else: else:
row_st = __builtin__.max(r * st0, self.padding[0]) row_st = __builtin__.max(r * st0, self.padding[0])
row_end = __builtin__.min(row_st + ds0, img_rows) row_end = __builtin__.min(row_st + ds0, img_rows)
for c in xrange(pc): for c in xrange(pc):
if inc_pad: if sum_mode or inc_pad:
col_st = c * st1 col_st = c * st1
else: else:
col_st = __builtin__.max(c * st1, col_st = __builtin__.max(c * st1,
self.padding[1]) self.padding[1])
col_end = __builtin__.min(col_st + ds1, img_cols) col_end = __builtin__.min(col_st + ds1, img_cols)
if sum_mode:
val = gz[n, k, r, c]
else:
val = gz[n, k, r, c] / ((row_end - row_st) * val = gz[n, k, r, c] / ((row_end - row_st) *
(col_end - col_st)) (col_end - col_st))
gx[n, k, row_st:row_end, col_st:col_end] += val gx[n, k, row_st:row_end, col_st:col_end] += val
...@@ -573,7 +610,7 @@ class DownsampleFactorMaxGrad(Op): ...@@ -573,7 +610,7 @@ class DownsampleFactorMaxGrad(Op):
else: else:
return [theano.tensor.zeros_like(x), return [theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout), theano.tensor.zeros_like(maxout),
theano.gradients.grad_not_implemented( theano.gradient.grad_not_implemented(
self, 2, gz, 'Hessian not implemented with padding')] self, 2, gz, 'Hessian not implemented with padding')]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论