Add sum mode to Downsample

104eb5fa · Gijs van Tulder · a0dadf5d · 104eb5fa · 104eb5fa
--- a/theano/tensor/signal/downsample.py
+++ b/theano/tensor/signal/downsample.py
@@ -64,10 +64,10 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
            of the images, pad_h is the size of the top and bottom margins,
            and pad_w is the size of the left and right margins.
    :type padding: tuple of two ints
-    :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'.
-        Operation executed on each window.  `max` always excludes the padding
-        in the computation. `average` gives you the choice to include or
-        exclude it.
+    :param mode: 'max', 'sum', 'average_inc_pad' or 'average_exc_pad'.
+        Operation executed on each window.  `max` and `sum` always exclude
+        the padding in the computation. `average` gives you the choice to
+        include or exclude it.
    :type mode: string
    """
    if input.ndim < 2:
@@ -104,7 +104,7 @@ def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
 class DownsampleFactorMax(Op):
    """For N-dimensional tensors, consider that the last two
    dimensions span images.  This Op downsamples these images by
-    taking the max or average over different patch.
+    taking the max, sum or average over different patch.

    """
    __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
@@ -188,7 +188,7 @@ class DownsampleFactorMax(Op):

    def __init__(self, ds, ignore_border=False, st=None, padding=(0, 0),
                 mode='max'):
-        """ Take the max or average or different input patches.
+        """ Take the max, sum or average or different input patches.

        :param ds: downsample factor over rows and column.
                   ds indicates the pool region size.
@@ -210,8 +210,8 @@ class DownsampleFactorMax(Op):
            and pad_w is the size of the left and right margins.
        :type padding: tuple of two ints

-        :param mode: 'max', 'average_inc_pad', 'average_exc_pad'.
-            ('average_inc_pad' exclude the padding from the count,
+        :param mode: 'max', 'sum', 'average_inc_pad', 'average_exc_pad'.
+            ('average_inc_pad' excludes the padding from the count,
            'average_exc_pad' include it)

        """
@@ -232,9 +232,9 @@ class DownsampleFactorMax(Op):
        if self.padding[0] >= self.ds[0] or self.padding[1] >= self.ds[1]:
            raise NotImplementedError(
                'padding_h and padding_w must be smaller than strides')
-        if mode not in ['max', 'average_inc_pad', 'average_exc_pad']:
+        if mode not in ['max', 'average_inc_pad', 'average_exc_pad', 'sum']:
            raise ValueError(
-                "DownsampleFactorMax mode parameter only support 'max',"
+                "DownsampleFactorMax mode parameter only support 'max', 'sum',"
                " 'average_inc_pad' and 'average_exc_pad'. Got %s" % mode)
        self.mode = mode

@@ -277,7 +277,9 @@ class DownsampleFactorMax(Op):
        else:
            y = x
        func = numpy.max
-        if self.mode != 'max':
+        if self.mode == 'sum':
+            func = numpy.sum
+        elif self.mode != 'max':
            func = numpy.average

        for n in xrange(x.shape[0]):
@@ -317,7 +319,7 @@ class DownsampleFactorMax(Op):
        return ['<algorithm>']

    def c_code(self, node, name, inp, out, sub):
-        if self.mode not in ('max', 'average_exc_pad', 'average_inc_pad'):
+        if self.mode not in ('max', 'sum', 'average_exc_pad', 'average_inc_pad'):
            raise theano.gof.utils.MethodNotDefined()
        x, = inp
        z, = out
@@ -448,7 +450,7 @@ class DownsampleFactorMax(Op):
        """
        if self.mode == 'max':
            ccode += """
-                    // use the first element as the initial value of maximum
+                    // use the first element as the initial value of collector
                    collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,r_st,c_st)))[0];
                    // go through the pooled region in the unpadded input
                    for(int m=r_st; m<r_end; m++)
@@ -461,7 +463,7 @@ class DownsampleFactorMax(Op):
                    }
                    z[0] = collector;
            """
-        elif self.mode == 'average_exc_pad' or self.mode == 'average_inc_pad':
+        elif self.mode in ('sum', 'average_exc_pad', 'average_inc_pad'):
            ccode += """
                    // initialize the sum at zero
                    collector = ((dtype_%(x)s)(0));
@@ -475,7 +477,11 @@ class DownsampleFactorMax(Op):
                      }
                    }
            """
-            if self.mode == 'average_inc_pad' and self.ignore_border:
+            if self.mode == "sum":
+                ccode += """
+                    z[0] = collector;
+                """
+            elif self.mode == 'average_inc_pad' and self.ignore_border:
                ccode += """
                    z[0] = collector / (%(ds0)s * %(ds1)s);
                """
@@ -493,7 +499,7 @@ class DownsampleFactorMax(Op):
        return ccode % locals()

    def c_code_cache_version(self):
-        return (0, 6, 8, 1)
+        return (0, 6, 8, 3)

 class DownsampleFactorMaxGrad(Op):
    __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
@@ -505,9 +511,9 @@ class DownsampleFactorMaxGrad(Op):
            st = ds
        self.st = tuple(st)
        self.padding = tuple(padding)
-        if mode not in ['max', 'average_inc_pad', 'average_exc_pad']:
+        if mode not in ['max', 'sum', 'average_inc_pad', 'average_exc_pad']:
            raise ValueError(
-                "DownsampleFactorMax mode parameter only support 'max',"
+                "DownsampleFactorMax mode parameter only support 'max', 'sum',"
                " 'average_inc_pad' and 'average_exc_pad'. Got %s" % mode)
        self.mode = mode

@@ -524,7 +530,7 @@ class DownsampleFactorMaxGrad(Op):
        return Apply(self, [x, maxout, gz], [x.type()])

    def perform(self, node, inp, out):
-        if self.mode != 'max' and self.padding != (0, 0):
+        if self.mode not in ('max', 'sum') and self.padding != (0, 0):
            raise NotImplementedError()
        x, maxout, gz = inp
        gx_stg, = out
@@ -539,6 +545,7 @@ class DownsampleFactorMaxGrad(Op):
        img_rows = x.shape[-2] + 2 * pad_h
        img_cols = x.shape[-1] + 2 * pad_w
        inc_pad = self.mode == 'average_inc_pad'
+        sum_mode = self.mode == 'sum'

        # pad the image
        if self.padding != (0, 0):
@@ -566,20 +573,23 @@ class DownsampleFactorMaxGrad(Op):
            for n in xrange(x.shape[0]):
                for k in xrange(x.shape[1]):
                    for r in xrange(pr):
-                        if inc_pad:
+                        if sum_mode or inc_pad:
                            row_st = r * st0
                        else:
                            row_st = __builtin__.max(r * st0, self.padding[0])
                        row_end = __builtin__.min(row_st + ds0, img_rows)
                        for c in xrange(pc):
-                            if inc_pad:
+                            if sum_mode or inc_pad:
                                col_st = c * st1
                            else:
                                col_st = __builtin__.max(c * st1,
                                                         self.padding[1])
                            col_end = __builtin__.min(col_st + ds1, img_cols)
-                            val = gz[n, k, r, c] / ((row_end - row_st) *
-                                                    (col_end - col_st))
+                            if sum_mode:
+                              val = gz[n, k, r, c]
+                            else:
+                              val = gz[n, k, r, c] / ((row_end - row_st) *
+                                                      (col_end - col_st))
                            gx[n, k, row_st:row_end, col_st:col_end] += val
        # unpad the image
        gx = gx[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)]

--- a/theano/tensor/signal/tests/test_downsample.py
+++ b/theano/tensor/signal/tests/test_downsample.py
@@ -33,7 +33,9 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
        out_shp.append(input.shape[-1] / ds[1] + yi)
        output_val = numpy.zeros(out_shp)
        func = numpy.max
-        if mode != 'max':
+        if mode == 'sum':
+            func = numpy.sum
+        elif mode != 'max':
            func = numpy.average

        for k in numpy.ndindex(*input.shape[:-2]):
@@ -76,7 +78,9 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
        tt = []
        y = pad_img(x)
        func = numpy.max
-        if mode != 'max':
+        if mode == 'sum':
+            func = numpy.sum
+        elif mode != 'max':
            func = numpy.average
        inc_pad = mode == 'average_inc_pad'

@@ -145,7 +149,9 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
        out_shp.append(out_c)

        func = numpy.max
-        if mode != 'max':
+        if mode == 'sum':
+            func = numpy.sum
+        elif mode != 'max':
            func = numpy.average

        output_val = numpy.zeros(out_shp)
@@ -169,6 +175,7 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
        for maxpoolshp, ignore_border, mode in product(maxpoolshps,
                                                       [True, False],
                                                       ['max',
+                                                        'sum',
                                                        'average_inc_pad',
                                                        'average_exc_pad']):
                # print 'maxpoolshp =', maxpoolshp
@@ -198,23 +205,23 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
        stridesizes = ((1, 1), (3, 3), (5, 7))
        # generate random images
        imval = rng.rand(4, 10, 16, 16)
+        # The same for each mode
        outputshps = ((4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3),
                      (4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3),
                      (4, 10, 14, 14), (4, 10, 5, 5), (4, 10, 3, 2),
                      (4, 10, 14, 14), (4, 10, 6, 6), (4, 10, 4, 3),
                      (4, 10, 12, 14), (4, 10, 4, 5), (4, 10, 3, 2),
                      (4, 10, 12, 14), (4, 10, 5, 6), (4, 10, 4, 3))
-        # The same for each mode
-        outputshps = outputshps + outputshps + outputshps
        images = tensor.dtensor4()
        indx = 0
        for mode, maxpoolshp, ignore_border in product(['max',
+                                                        'sum',
                                                        'average_inc_pad',
                                                        'average_exc_pad'],
                                                       maxpoolshps,
                                                       [True, False]):
                for stride in stridesizes:
-                    outputshp = outputshps[indx]
+                    outputshp = outputshps[indx % len(outputshps)]
                    indx += 1
                    # DownsampleFactorMax op
                    numpy_output_val = \
@@ -251,7 +258,8 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
            stride = stridesizes[indx]
            maxpoolshp = maxpoolshps[indx]
            for ignore_border, mode in product([True, False],
-                                               ['max', 'average_inc_pad',
+                                               ['max', 'sum',
+                                                'average_inc_pad',
                                                'average_exc_pad']):
                indx_out = indx * 2
                if not ignore_border:
@@ -283,7 +291,7 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
        c = 2  # channel size
        images = tensor.dtensor4()
        for indx, mode in product(numpy.arange(len(maxpoolsizes)),
-                                  ['max', 'average_inc_pad',
+                                  ['max', 'sum', 'average_inc_pad',
                                   'average_exc_pad']):
            imgsize = imgsizes[indx]
            imval = rng.rand(m, c, imgsize[0], imgsize[1]) - 0.5
@@ -486,7 +494,7 @@ class TestDownsampleFactorMax(utt.InferShapeTester):

        for maxpoolshp, ignore_border, mode in product(maxpoolshps,
                                                       [True, False],
-                                                       ['max',
+                                                       ['max', 'sum',
                                                        'average_inc_pad',
                                                        'average_exc_pad']):
                # print 'maxpoolshp =', maxpoolshp
@@ -537,7 +545,7 @@ class TestDownsampleFactorMax(utt.InferShapeTester):

        for maxpoolshp, ignore_border, mode in product(maxpoolshps,
                                                       [True, False],
-                                                       ['max',
+                                                       ['max', 'sum',
                                                        'average_inc_pad',
                                                        'average_exc_pad']):
                # print 'maxpoolshp =', maxpoolshp
@@ -574,7 +582,7 @@ class TestDownsampleFactorMax(utt.InferShapeTester):

        for maxpoolshp, ignore_border, mode in product(maxpoolshps,
                                                       [True, False],
-                                                       ['max',
+                                                       ['max', 'sum',
                                                        'average_inc_pad',
                                                        'average_exc_pad']):
                # print 'maxpoolshp =', maxpoolshp