Merge pull request #2917 from yaoli/maxpool_c_code

[MRG] implement padding for max pool grad grad

Merge pull request #2917 from yaoli/maxpool_c_code
f641f02e · Pascal Lamblin · 131ea96a · b3abc664 · f641f02e · f641f02e
--- a/theano/tensor/signal/downsample.py
+++ b/theano/tensor/signal/downsample.py
@@ -602,12 +602,12 @@ class DownsampleFactorMaxGrad(Op):
    def grad(self, inp, grads):
        x, maxout, gz = inp
        ggx, = grads
-        if self.padding == (0, 0) and self.mode == 'max':
+        if self.mode == 'max':
            return [theano.tensor.zeros_like(x),
                    theano.tensor.zeros_like(maxout),
                    DownsampleFactorMaxGradGrad(
                        self.ds, ignore_border=self.ignore_border,
-                        st=self.st)(x, maxout, ggx)]
+                        st=self.st, padding=self.padding)(x, maxout, ggx)]
        else:
            return [theano.tensor.zeros_like(x),
                    theano.tensor.zeros_like(maxout),
@@ -733,10 +733,10 @@ class DownsampleFactorMaxGrad(Op):
        return (0, 7)
 class DownsampleFactorMaxGradGrad(Op):
-    __props__ = ('ds', 'ignore_border', 'st')
+    __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
    @staticmethod
-    def out_shape(imgshape, ds, ignore_border=False, st=None):
+    def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
        """Return the shape of the output from this op, for input of given
        shape and flags.
@@ -757,6 +757,11 @@ class DownsampleFactorMaxGradGrad(Op):
            extra row/col of partial downsampling (False) or ignore it (True).
        :type ignore_border: bool
+        :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
+            of the images, pad_h is the size of the top and bottom margins,
+            and pad_w is the size of the left and right margins.
+        :type padding: tuple of two ints
        :rtype: list
        :returns: the shape of the output from this op, for input of given
            shape.  This will have the same length as imgshape, but with last
@@ -769,6 +774,8 @@ class DownsampleFactorMaxGradGrad(Op):
        if st is None:
            st = ds
        r, c = imgshape[-2:]
+        r += padding[0] * 2
+        c += padding[1] * 2
        if ignore_border:
            out_r = (r - ds[0]) // st[0] + 1
@@ -805,12 +812,25 @@ class DownsampleFactorMaxGradGrad(Op):
        rval = list(imgshape[:-2]) + [nr, nc]
        return rval
-    def __init__(self, ds, ignore_border, st=None):
+    def __init__(self, ds, ignore_border, st=None, padding=(0,0), mode='max'):
        self.ds = tuple(ds)
-        self.ignore_border = ignore_border
+        if not all([isinstance(d, int) for d in ds]):
+            raise ValueError(
+                "DownsampleFactorMax downsample parameters must be ints."
+                " Got %s" % str(ds))
        if st is None:
            st = ds
+        assert isinstance(st, (tuple, list))
        self.st = tuple(st)
+        self.ignore_border = ignore_border
+        self.padding = tuple(padding)
+        if self.padding != (0, 0) and not ignore_border:
+            raise NotImplementedError(
+                'padding works only with ignore_border=True')
+        if self.padding[0] >= self.ds[0] or self.padding[1] >= self.ds[1]:
+            raise NotImplementedError(
+                'padding_h and padding_w must be smaller than strides')
+        self.mode = mode
    def make_node(self, x, maxout, gz):
        # make_node should only be called by the grad function of
@@ -825,28 +845,42 @@ class DownsampleFactorMaxGradGrad(Op):
        return Apply(self, [x, maxout, gz], [x.type()])
    def perform(self, node, inp, out):
+        if self.mode != 'max':
+            raise theano.gof.utils.MethodNotDefined()
        x, maxout, ggx = inp
        z, = out
        if len(x.shape) != 4:
            raise NotImplementedError(
                'DownsampleFactorMaxGradGrad requires 4D input for now')
-        z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st)
+        z_shape = self.out_shape(x.shape, self.ds, self.ignore_border,
+                                 self.st, self.padding)
        if (z[0] is None) or (z[0].shape != z_shape):
-            z[0] = numpy.zeros(self.out_shape(x.shape, self.ds,
+            z[0] = numpy.zeros(z_shape, dtype=x.dtype)
-                                              self.ignore_border, self.st),
+        ggz = z[0] # grad wrt maxout_grad has the same shape as maxout
-                               dtype=x.dtype)
-        ggz = z[0]
        # number of pooling output rows
        pr = ggz.shape[-2]
        # number of pooling output cols
        pc = ggz.shape[-1]
        ds0, ds1 = self.ds
        st0, st1 = self.st
-        img_rows = x.shape[-2]
+        pd0, pd1 = self.padding
-        img_cols = x.shape[-1]
+        img_rows = x.shape[-2] + 2 * pd0 
+        img_cols = x.shape[-1] + 2 * pd1
+        # pad the image and its gradients
+        if self.padding != (0, 0):
+            y_padded = numpy.zeros(
+                (x.shape[0], x.shape[1], img_rows, img_cols),
+                dtype=x.dtype) + x.min() - 1
+            y_padded[:, :, pd0:(img_rows-pd0), pd1:(img_cols-pd1)] = x
+            ggx_padded = numpy.zeros(
+                (x.shape[0], x.shape[1], img_rows, img_cols),
+                dtype=x.dtype)
+            ggx_padded[:, :, pd0:(img_rows-pd0), pd1:(img_cols-pd1)] = ggx
+        else:
+            y_padded = x
+            ggx_padded = ggx
        for n in xrange(x.shape[0]):
            for k in xrange(x.shape[1]):
                for r in xrange(pr):
@@ -857,8 +891,94 @@ class DownsampleFactorMaxGradGrad(Op):
                        col_end = builtins.min(col_st + ds1, img_cols)
                        for row_ind in xrange(row_st, row_end):
                            for col_ind in xrange(col_st, col_end):
-                                if (maxout[n, k, r, c] == x[n, k, row_ind, col_ind]):
+                                if (maxout[n, k, r, c] == y_padded[n, k, row_ind, col_ind]):
-                                    ggz[n, k, r, c] = ggx[n, k, row_ind, col_ind]
+                                    ggz[n, k, r, c] = ggx_padded[n, k, row_ind, col_ind]
    def infer_shape(self, node, in_shapes):
        return [in_shapes[0]]
+    def c_code(self, node, name, inp, out, sub):
+        if self.mode != 'max':
+            raise theano.gof.utils.MethodNotDefined()
+        x, maxout, ggx = inp
+        z, = out # the grad of grad 
+        fail = sub['fail']
+        ignore_border = int(self.ignore_border)
+        ds0, ds1 = self.ds
+        st0, st1 = self.st
+        pd0, pd1 = self.padding
+        return """
+        int z_typenum = PyArray_ObjectType((PyObject*)%(maxout)s, 0);
+        int z_r, z_c;
+        z_r = PyArray_DIMS(%(maxout)s)[2];
+        z_c = PyArray_DIMS(%(maxout)s)[3];
+        int r, c; // shape of the padded_input
+        r = PyArray_DIMS(%(x)s)[2];
+        c = PyArray_DIMS(%(x)s)[3];
+        r += %(pd0)s * 2;
+        c += %(pd1)s * 2;
+        // allocating memory for output
+        if ((!%(z)s)
+          || !PyArray_ISCONTIGUOUS(%(z)s)
+          || *PyArray_DIMS(%(z)s)!=4
+          ||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(maxout)s)[0])
+          ||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(maxout)s)[1])
+          ||(PyArray_DIMS(%(z)s)[2] != PyArray_DIMS(%(maxout)s)[2])
+          ||(PyArray_DIMS(%(z)s)[3] != PyArray_DIMS(%(maxout)s)[3])
+          )
+        {
+          Py_XDECREF(%(z)s);
+          %(z)s = (PyArrayObject*) PyArray_ZEROS(4, PyArray_DIMS(%(maxout)s), z_typenum,0);
+        }
+        else {
+          PyArray_FILLWBYTE(%(z)s, 0);
+        }
+        dtype_%(maxout)s maximum; // temp var for maximum value in a region
+        int r_st, r_end, c_st, c_end; // used to index into the input img x
+        for(int b=0; b<PyArray_DIMS(%(x)s)[0]; b++){
+              for(int k=0; k<PyArray_DIMS(%(x)s)[1]; k++){
+                for(int i=0; i< z_r; i++){
+                  r_st = i * %(st0)s;
+                  r_end = r_st + %(ds0)s;
+                  // skip the padding
+                  r_st = r_st < %(pd0)s ? %(pd0)s : r_st;
+                  r_end = r_end > (r - %(pd0)s) ? r - %(pd0)s : r_end;
+                  // from padded_img space to img space
+                  r_st -= %(pd0)s;
+                  r_end -= %(pd0)s;
+                  for(int j=0; j<z_c; j++){
+                    c_st = j * %(st1)s;
+                    c_end = c_st + %(ds1)s;
+                    // skip the padding
+                    c_st = c_st < %(pd1)s ? %(pd1)s : c_st;
+                    c_end = c_end > (c - %(pd1)s) ? c - %(pd1)s : c_end;
+                    // from padding_img space into img space
+                    c_st -= %(pd1)s;
+                    c_end -= %(pd1)s;
+                    // the maximum value
+                    maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,b,k,i,j)))[0];
+                    // z at this position
+                    dtype_%(z)s * z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, b, k, i, j)));
+                    // go through the pooled region in the unpadded input
+                    for(int m=r_st; m<r_end; m++)
+                    {
+                      for(int n=c_st; n<c_end; n++)
+                      {
+                        dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
+                        dtype_%(ggx)s * ggx = (
+                          (dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s, b, k, m, n)));
+                        if (a == maximum){
+                          z[0] += ggx[0];
+                        }  
+                      }
+                    }
+                  }
+                }
+              }
+         }
+        """%locals()
+    def c_code_cache_version(self):
+        return (0,1)
--- a/theano/tensor/signal/tests/test_downsample.py
+++ b/theano/tensor/signal/tests/test_downsample.py
@@ -8,7 +8,9 @@ import theano
 import theano.tensor as tensor
 from theano.tests import unittest_tools as utt
 from theano.tensor.signal.downsample import (DownsampleFactorMax, max_pool_2d,
-                                             DownsampleFactorMaxGrad, max_pool_2d_same_size)
+                                             DownsampleFactorMaxGrad,
+                                             DownsampleFactorMaxGradGrad,
+                                             max_pool_2d_same_size)
 from theano import function
@@ -482,7 +484,36 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                if numpy.prod(grad_shape) == 0:
                    continue
                utt.verify_grad(mp, [imval, grad_val], rng=rng)
+    def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
+        rng = numpy.random.RandomState(utt.fetch_seed())        
+        imgsizes = ((10, 10), (10, 5), (5, 5))
+        maxpoolsizes = ((5, 3), (3, 5), (3, 3))
+        stridesizes = ((3, 2), (2, 3), (3, 3))
+        paddingsizes = ((2, 2), (2, 1), (2, 2))
+        for i in range(len(imgsizes)):
+            imgsize = imgsizes[i]
+            imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
+            maxpoolsize = maxpoolsizes[i]
+            stridesize = stridesizes[i]
+            paddingsize = paddingsizes[i]
+            grad_shape = DownsampleFactorMaxGradGrad.out_shape(
+                    imval.shape, maxpoolsize, st=stridesize,
+                ignore_border=True, padding=paddingsize)
+            grad_val = rng.rand(*grad_shape) * 10.0
+            def mp(input, grad):
+                out = DownsampleFactorMax(
+                    maxpoolsize, ignore_border=True,
+                    st=stridesize,
+                    padding=paddingsize,
+                    )(input)
+                grad_op = DownsampleFactorMaxGrad(maxpoolsize, ignore_border=True,
+                                                  st=stridesize, padding=paddingsize)
+                return grad_op(input, out, grad)
+            utt.verify_grad(mp, [imval, grad_val], rng=rng)
    def test_DownsampleFactorMax_hessian(self):
        # Example provided by Frans Cronje, see
        # https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J