Merge pull request #3139 from sebastien-j/average_pooling

Average pooling

Merge pull request #3139 from sebastien-j/average_pooling
6e7a904e · Pascal Lamblin · 079181cf · 8298b5b1 · 6e7a904e · 6e7a904e
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -13,7 +13,7 @@ from theano.compile.ops import shape_i
 from theano.configparser import AddConfigVar, EnumStr
 from theano.tensor.nnet import SoftmaxGrad
 from theano.tensor.signal.downsample import (
-    DownsampleFactorMax, DownsampleFactorMaxGrad)
+    DownsampleFactorMax, MaxPoolGrad, AveragePoolGrad)
 from theano.sandbox.cuda import GpuOp
 from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
                                           host_from_gpu,
@@ -2204,11 +2204,11 @@ if True:
                                     desc)]

    @register_opt('cudnn')
-    @local_optimizer([DownsampleFactorMaxGrad])
+    @local_optimizer([MaxPoolGrad])
    def local_pool_dnn_grad_stride(node):
        if not dnn_available():
            return
-        if isinstance(node.op, DownsampleFactorMaxGrad):
+        if isinstance(node.op, MaxPoolGrad):
            if not node.op.ignore_border:
                return
            inp, out, inp_grad = node.inputs
@@ -2228,6 +2228,31 @@ if True:
                                       desc)
                return [host_from_gpu(ret)]

+    @register_opt('cudnn')
+    @local_optimizer([AveragePoolGrad])
+    def local_avgpool_dnn_grad_stride(node):
+        if not dnn_available():
+            return
+        if isinstance(node.op, AveragePoolGrad):
+            if not node.op.ignore_border:
+                return
+            inp, inp_grad = node.inputs
+            ds = node.op.ds
+            st = node.op.st
+            pad = node.op.padding
+            mode = node.op.mode
+
+            if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
+                (inp_grad.owner and isinstance(inp_grad.owner.op,
+                                               HostFromGpu))):
+                desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)()
+                ret = GpuDnnPoolGrad()(gpu_contiguous(inp),
+                                       gpu_contiguous(numpy.empty((1,1,1,1),
+                                           dtype=numpy.float32)),
+                                       gpu_contiguous(inp_grad),
+                                       desc)
+                return [host_from_gpu(ret)]
+
    @register_opt('cudnn')
    @local_optimizer([GpuSoftmax])
    def local_softmax_dnn(node):

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -120,7 +120,8 @@ cpu_ops_moved_to_gpu = [
    tensor.blas.Dot22, tensor.blas.Dot22Scalar, tensor.blas.Gemm,
    tensor.blas.Gemv, tensor.blas.Ger, tensor.nnet.conv.ConvOp,
    tensor.signal.downsample.DownsampleFactorMax,
-    tensor.signal.downsample.DownsampleFactorMaxGrad,
+    tensor.signal.downsample.MaxPoolGrad,
+    tensor.signal.downsample.AveragePoolGrad,
    theano.tensor.nnet.neighbours.Images2Neibs,
    tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias,
    tensor.nnet.CrossentropySoftmax1HotWithBiasDx,
@@ -1765,9 +1766,9 @@ def local_gpu_downsample_factor_max(node):


 @register_opt()
-@local_optimizer([downsample.DownsampleFactorMaxGrad])
+@local_optimizer([downsample.MaxPoolGrad])
 def local_gpu_downsample_factor_max_grad(node):
-    if (isinstance(node.op, downsample.DownsampleFactorMaxGrad) and
+    if (isinstance(node.op, downsample.MaxPoolGrad) and
        node.op.ds == node.op.st):
        assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding',
                                     'mode')

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -10,7 +10,7 @@ import theano.tensor as T
 import theano.tests.unittest_tools as utt
 from theano.sandbox.neighbours import images2neibs
 from theano.tensor.signal.downsample import max_pool_2d
-from theano.tensor.signal.downsample import DownsampleFactorMaxGrad
+from theano.tensor.signal.downsample import MaxPoolGrad, AveragePoolGrad
 import theano.sandbox.cuda.dnn as dnn
 from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
 from theano.sandbox.cuda import float32_shared_constructor as shared
@@ -316,8 +316,12 @@ def test_pooling():
                              ignore_border=True, mode=mode)
            fc = theano.function([x], theano.grad(out.sum(), x),
                                 mode=mode_without_gpu)
-            assert any([isinstance(node.op, DownsampleFactorMaxGrad)
-                        for node in fc.maker.fgraph.toposort()])
+            if mode == 'max':
+                assert any([isinstance(node.op, MaxPoolGrad)
+                            for node in fc.maker.fgraph.toposort()])
+            else:
+                assert any([isinstance(node.op, AveragePoolGrad)
+                            for node in fc.maker.fgraph.toposort()])
            c_out = fc(data)
            assert numpy.allclose(c_out, g_out)


--- a/theano/sandbox/gpuarray/dnn.py
+++ b/theano/sandbox/gpuarray/dnn.py
@@ -13,7 +13,7 @@ from theano.compile.ops import shape_i
 from theano.configparser import AddConfigVar, EnumStr, StrParam
 from theano.tensor.nnet import SoftmaxGrad
 from theano.tensor.signal.downsample import (
-    DownsampleFactorMax, DownsampleFactorMaxGrad)
+    DownsampleFactorMax, MaxPoolGrad, AveragePoolGrad)

 from . import pygpu, init_dev
 from .basic_ops import (as_gpuarray_variable,
@@ -1659,7 +1659,7 @@ def local_pool_dnn_alternative(node):


 @register_opt('cudnn')
-@op_lifter([DownsampleFactorMaxGrad])
+@op_lifter([MaxPoolGrad])
 def local_pool_dnn_grad_stride(node):
    if not dnn_available():
        return
@@ -1678,6 +1678,26 @@ def local_pool_dnn_grad_stride(node):
                            desc)


+@register_opt('cudnn')
+@op_lifter([AveragePoolGrad])
+def local_avg_pool_dnn_grad_stride(node):
+    if not dnn_available():
+        return
+    if not node.op.ignore_border:
+        return
+    inp, inp_grad = node.inputs
+    ds = node.op.ds
+    st = node.op.st
+    pad = node.op.padding
+    mode = node.op.mode
+
+    desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)()
+    return GpuDnnPoolGrad()(gpu_contiguous(inp),
+                            gpu_contiguous(numpy.empty((1, 1, 1, 1), dtype=numpy.float32)),
+                            gpu_contiguous(inp_grad),
+                            desc)
+
+
 @register_opt('cudnn')
 @local_optimizer([GpuSoftmax])
 def local_softmax_dnn(node):

--- a/theano/sandbox/gpuarray/tests/test_dnn.py
+++ b/theano/sandbox/gpuarray/tests/test_dnn.py
@@ -10,7 +10,7 @@ import theano.tensor as T
 import theano.tests.unittest_tools as utt
 from theano.sandbox.neighbours import images2neibs
 from theano.tensor.signal.downsample import max_pool_2d
-from theano.tensor.signal.downsample import DownsampleFactorMaxGrad
+from theano.tensor.signal.downsample import MaxPoolGrad, AveragePoolGrad

 from .. import dnn
 from ..basic_ops import GpuAllocEmpty
@@ -264,8 +264,12 @@ def test_pooling():
                              ignore_border=True, mode=mode)
            fc = theano.function([x], theano.grad(out.sum(), x),
                                 mode=mode_without_gpu)
-            assert any([isinstance(node.op, DownsampleFactorMaxGrad)
-                        for node in fc.maker.fgraph.toposort()])
+            if mode == 'max':
+                assert any([isinstance(node.op, MaxPoolGrad)
+                            for node in fc.maker.fgraph.toposort()])
+            else:
+                assert any([isinstance(node.op, AveragePoolGrad)
+                            for node in fc.maker.fgraph.toposort()])
            c_out = fc(data)
            assert numpy.allclose(c_out, g_out)


--- a/theano/tensor/signal/downsample.py
+++ b/theano/tensor/signal/downsample.py
@@ -14,6 +14,7 @@ import numpy
 import theano
 from theano import gof, Op, tensor, Variable, Apply

+from theano.tensor.opt import register_canonicalize

 def max_pool2D(*args, **kwargs):
    import sys
@@ -36,7 +37,7 @@ def max_pool_2d_same_size(input, patch_size):
        (2,2) will retain only one non-zero value per patch of 4 values.
    """
    output = DownsampleFactorMax(patch_size, True)(input)
-    outs = DownsampleFactorMaxGrad(patch_size, True)(input, output, output)
+    outs = MaxPoolGrad(patch_size, True)(input, output, output)
    return outs


@@ -309,13 +310,18 @@ class DownsampleFactorMax(Op):
    def grad(self, inp, grads):
        x, = inp
        gz, = grads
-        maxout = self(x)
-        return [DownsampleFactorMaxGrad(self.ds,
-                                        ignore_border=self.ignore_border,
-                                        st=self.st, padding=self.padding,
-                                        mode=self.mode)(
-                                            x, maxout, gz)]
-
+        if self.mode == 'max':
+            maxout = self(x)
+            return [MaxPoolGrad(self.ds,
+                                ignore_border=self.ignore_border,
+                                st=self.st, padding=self.padding)(
+                                    x, maxout, gz)]
+        else:
+            return [AveragePoolGrad(self.ds,
+                                    ignore_border=self.ignore_border,
+                                    st=self.st, padding=self.padding,
+                                    mode=self.mode)(
+                                        x, gz)]
    def c_headers(self):
        return ['<algorithm>']

@@ -502,9 +508,86 @@ class DownsampleFactorMax(Op):
    def c_code_cache_version(self):
        return (0, 6, 8, 3)

-class DownsampleFactorMaxGrad(Op):
+class PoolGrad(Op):
    __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')

+    @staticmethod
+    def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
+        """Return the shape of the output from this op, for input of given
+        shape and flags.
+
+        :param imgshape: the shape of a tensor of images. The last two elements
+            are interpreted as the number of rows, and the number of cols.
+        :type imgshape: tuple, list, or similar of integer or
+            scalar Theano variable.
+
+        :param ds: downsample factor over rows and columns
+                   this parameter indicates the size of the pooling region
+        :type ds: list or tuple of two ints
+
+        :param st: the stride size. This is the distance between the pooling
+                   regions. If it's set to None, in which case it equlas ds.
+        :type st: list or tuple of two ints
+
+        :param ignore_border: if ds doesn't divide imgshape, do we include an
+            extra row/col of partial downsampling (False) or ignore it (True).
+        :type ignore_border: bool
+
+        :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
+            of the images, pad_h is the size of the top and bottom margins,
+            and pad_w is the size of the left and right margins.
+        :type padding: tuple of two ints
+
+        :rtype: list
+        :returns: the shape of the output from this op, for input of given
+            shape.  This will have the same length as imgshape, but with last
+            two elements reduced as per the downsampling & ignore_border flags.
+        """
+        if len(imgshape) < 2:
+            raise TypeError('imgshape must have at least two elements '
+                            '(rows, cols)')
+
+        if st is None:
+            st = ds
+        r, c = imgshape[-2:]
+        r += padding[0] * 2
+        c += padding[1] * 2
+
+        if ignore_border:
+            out_r = (r - ds[0]) // st[0] + 1
+            out_c = (c - ds[1]) // st[1] + 1
+            if isinstance(r, theano.Variable):
+                nr = tensor.maximum(out_r, 0)
+            else:
+                nr = numpy.maximum(out_r, 0)
+            if isinstance(c, theano.Variable):
+                nc = tensor.maximum(out_c, 0)
+            else:
+                nc = numpy.maximum(out_c, 0)
+        else:
+            if isinstance(r, theano.Variable):
+                nr = tensor.switch(tensor.ge(st[0], ds[0]),
+                                   (r - 1) // st[0] + 1,
+                                   tensor.maximum(0, (r - 1 - ds[0])
+                                                  // st[0] + 1) + 1)
+            elif st[0] >= ds[0]:
+                nr = (r - 1) // st[0] + 1
+            else:
+                nr = max(0, (r - 1 - ds[0]) // st[0] + 1) + 1
+
+            if isinstance(c, theano.Variable):
+                nc = tensor.switch(tensor.ge(st[1], ds[1]),
+                                   (c - 1) // st[1] + 1,
+                                   tensor.maximum(0, (c - 1 - ds[1])
+                                                  // st[1] + 1) + 1)
+            elif st[1] >= ds[1]:
+                nc = (c - 1) // st[1] + 1
+            else:
+                nc = max(0, (c - 1 - ds[1]) // st[1] + 1) + 1
+
+        rval = list(imgshape[:-2]) + [nr, nc]
+        return rval
+
    def __init__(self, ds, ignore_border, st=None, padding=(0, 0), mode='max'):
        self.ds = tuple(ds)
        self.ignore_border = ignore_border
@@ -518,6 +601,15 @@ class DownsampleFactorMaxGrad(Op):
                " 'average_inc_pad' and 'average_exc_pad'. Got %s" % mode)
        self.mode = mode

+    def infer_shape(self, node, in_shapes):
+        return [in_shapes[0]]
+
+
+class MaxPoolGrad(PoolGrad):
+
+    def __init__(self, ds, ignore_border, st=None, padding=(0, 0), mode='max'):
+        PoolGrad.__init__(self, ds, ignore_border, st, padding, mode)
+
    def make_node(self, x, maxout, gz):
        # make_node should only be called by the grad function of
        # DownsampleFactorMax, so these asserts should not fail.
@@ -531,8 +623,7 @@ class DownsampleFactorMaxGrad(Op):
        return Apply(self, [x, maxout, gz], [x.type()])

    def perform(self, node, inp, out):
-        if self.mode not in ('max', 'sum') and self.padding != (0, 0):
-            raise NotImplementedError()
+        assert self.mode == 'max'
        x, maxout, gz = inp
        gx_stg, = out
        # number of pooling output rows
@@ -545,8 +636,6 @@ class DownsampleFactorMaxGrad(Op):
        pad_w = self.padding[1]
        img_rows = x.shape[-2] + 2 * pad_h
        img_cols = x.shape[-1] + 2 * pad_w
-        inc_pad = self.mode == 'average_inc_pad'
-        sum_mode = self.mode == 'sum'

        # pad the image
        if self.padding != (0, 0):
@@ -557,66 +646,33 @@ class DownsampleFactorMaxGrad(Op):
        else:
            y = x
        gx = numpy.zeros_like(y)
-        if self.mode == 'max':
-            for n in xrange(x.shape[0]):
-                for k in xrange(x.shape[1]):
-                    for r in xrange(pr):
-                        row_st = builtins.max(r * st0, self.padding[0])
-                        row_end = builtins.min(row_st + ds0, img_rows)
-                        for c in xrange(pc):
-                            col_st = builtins.max(c * st1, self.padding[1])
-                            col_end = builtins.min(col_st + ds1, img_cols)
-                            for row_ind in xrange(row_st, row_end):
-                                for col_ind in xrange(col_st, col_end):
-                                    if (maxout[n, k, r, c] == y[n, k, row_ind, col_ind]):
-                                        gx[n, k, row_ind, col_ind] += gz[n, k, r, c]
-        else:
-            for n in xrange(x.shape[0]):
-                for k in xrange(x.shape[1]):
-                    for r in xrange(pr):
-                        if sum_mode or inc_pad:
-                            row_st = r * st0
-                        else:
-                            row_st = builtins.max(r * st0, self.padding[0])
-                        row_end = builtins.min(row_st + ds0, img_rows)
-                        for c in xrange(pc):
-                            if sum_mode or inc_pad:
-                                col_st = c * st1
-                            else:
-                                col_st = builtins.max(c * st1,
-                                                         self.padding[1])
-                            col_end = builtins.min(col_st + ds1, img_cols)
-                            if sum_mode:
-                              val = gz[n, k, r, c]
-                            else:
-                              val = gz[n, k, r, c] / ((row_end - row_st) *
-                                                      (col_end - col_st))
-                            gx[n, k, row_st:row_end, col_st:col_end] += val
+        for n in xrange(x.shape[0]):
+            for k in xrange(x.shape[1]):
+                for r in xrange(pr):
+                    row_st = builtins.max(r * st0, self.padding[0])
+                    row_end = builtins.min(row_st + ds0, img_rows)
+                    for c in xrange(pc):
+                        col_st = builtins.max(c * st1, self.padding[1])
+                        col_end = builtins.min(col_st + ds1, img_cols)
+                        for row_ind in xrange(row_st, row_end):
+                            for col_ind in xrange(col_st, col_end):
+                                if (maxout[n, k, r, c] == y[n, k, row_ind, col_ind]):
+                                    gx[n, k, row_ind, col_ind] += gz[n, k, r, c]
        # unpad the image
        gx = gx[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)]
        gx_stg[0] = gx

-    def infer_shape(self, node, in_shapes):
-        return [in_shapes[0]]
-
    def grad(self, inp, grads):
        x, maxout, gz = inp
        ggx, = grads
-        if self.mode == 'max':
-            return [theano.tensor.zeros_like(x),
-                    theano.tensor.zeros_like(maxout),
-                    DownsampleFactorMaxGradGrad(
-                        self.ds, ignore_border=self.ignore_border,
-                        st=self.st, padding=self.padding)(x, maxout, ggx)]
-        else:
-            return [theano.tensor.zeros_like(x),
-                    theano.tensor.zeros_like(maxout),
-                    theano.gradient.grad_not_implemented(
-                        self, 2, gz, 'Hessian not implemented with padding')]
+        return [theano.tensor.zeros_like(x),
+                theano.tensor.zeros_like(maxout),
+                DownsampleFactorMaxGradGrad(
+                    self.ds, ignore_border=self.ignore_border,
+                    st=self.st, padding=self.padding)(x, maxout, ggx)]

    def c_code(self, node, name, inp, out, sub):
-        if self.mode != 'max':
-            raise theano.gof.utils.MethodNotDefined()
+        assert self.mode == 'max'
        x, z, gz = inp
        gx, = out
        fail = sub['fail']
@@ -732,6 +788,89 @@ class DownsampleFactorMaxGrad(Op):
    def c_code_cache_version(self):
        return (0, 7)

+DownsampleFactorMaxGrad = MaxPoolGrad
+
+class AveragePoolGrad(PoolGrad):
+
+    def __init__(self, ds, ignore_border, st=None, padding=(0, 0), mode='average_inc_pad'):
+        assert mode in ['sum', 'average_inc_pad', 'average_exc_pad']
+        PoolGrad.__init__(self, ds, ignore_border, st, padding, mode)
+
+    def make_node(self, x, gz):
+        # make_node should only be called by the grad function of
+        # DownsampleFactorMax, so these asserts should not fail.
+        assert isinstance(x, Variable) and x.ndim == 4
+        assert isinstance(gz, Variable) and gz.ndim == 4
+        x = tensor.as_tensor_variable(x)
+        gz = tensor.as_tensor_variable(gz)
+
+        return Apply(self, [x, gz], [x.type()])
+
+    def perform(self, node, inp, out):
+        if self.mode == 'average_exc_pad' and self.padding != (0, 0):
+            raise NotImplementedError()
+        x, gz = inp
+        gx_stg, = out
+        z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st,
+                                 self.padding)
+        if (gx_stg[0] is None) or (gx_stg[0].shape != z_shape):
+            gx_stg[0] = numpy.empty(z_shape, dtype=x.dtype)
+        zz = gx_stg[0]
+        # number of pooling output rows
+        pr = zz.shape[-2]
+        # number of pooling output cols
+        pc = zz.shape[-1]
+        ds0, ds1 = self.ds
+        st0, st1 = self.st
+        pad_h = self.padding[0]
+        pad_w = self.padding[1]
+        img_rows = x.shape[-2] + 2 * pad_h
+        img_cols = x.shape[-1] + 2 * pad_w
+        inc_pad = self.mode == 'average_inc_pad'
+        sum_mode = self.mode == 'sum'
+
+        # pad the image
+        if self.padding != (0, 0):
+            y = numpy.zeros(
+                (x.shape[0], x.shape[1], img_rows, img_cols),
+                dtype=x.dtype)
+            y[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)] = x
+        else:
+            y = x
+        gx = numpy.zeros_like(y)
+        for n in xrange(x.shape[0]):
+            for k in xrange(x.shape[1]):
+                for r in xrange(pr):
+                    if sum_mode or inc_pad:
+                        row_st = r * st0
+                    else:
+                        row_st = builtins.max(r * st0, self.padding[0])
+                    row_end = builtins.min(row_st + ds0, img_rows)
+                    for c in xrange(pc):
+                        if sum_mode or inc_pad:
+                            col_st = c * st1
+                        else:
+                            col_st = builtins.max(c * st1,
+                                                     self.padding[1])
+                        col_end = builtins.min(col_st + ds1, img_cols)
+                        if sum_mode:
+                          val = gz[n, k, r, c]
+                        else:
+                          val = gz[n, k, r, c] / ((row_end - row_st) *
+                                                  (col_end - col_st))
+                        gx[n, k, row_st:row_end, col_st:col_end] += val
+        # unpad the image
+        gx = gx[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)]
+        gx_stg[0] = gx
+
+    def grad(self, inp, grads):
+        x, gz = inp
+        ggx, = grads
+        return [theano.tensor.zeros_like(x),
+                DownsampleFactorMax(
+                    self.ds, ignore_border=self.ignore_border,
+                    st=self.st, padding=self.padding, mode=self.mode)(ggx)]
+
 class DownsampleFactorMaxGradGrad(Op):
    __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')

@@ -831,22 +970,21 @@ class DownsampleFactorMaxGradGrad(Op):
            raise NotImplementedError(
                'padding_h and padding_w must be smaller than strides')
        self.mode = mode
+        assert self.mode == 'max'

    def make_node(self, x, maxout, gz):
        # make_node should only be called by the grad function of
-        # DownsampleFactorMaxGrad, so these asserts should not fail.
-        assert isinstance(x, Variable) and x.ndim == 4
-        assert isinstance(maxout, Variable) and maxout.ndim == 4
-        assert isinstance(gz, Variable) and gz.ndim == 4
+        # MaxPoolGrad, so these asserts should not fail.
        x = tensor.as_tensor_variable(x)
        maxout = tensor.as_tensor_variable(maxout)
        gz = tensor.as_tensor_variable(gz)
+        assert x.ndim == 4
+        assert maxout.ndim == 4
+        assert gz.ndim == 4

        return Apply(self, [x, maxout, gz], [x.type()])

    def perform(self, node, inp, out):
-        if self.mode != 'max':
-            raise theano.gof.utils.MethodNotDefined()
        x, maxout, ggx = inp
        z, = out
        if len(x.shape) != 4:
@@ -864,7 +1002,7 @@ class DownsampleFactorMaxGradGrad(Op):
        ds0, ds1 = self.ds
        st0, st1 = self.st
        pd0, pd1 = self.padding
-        img_rows = x.shape[-2] + 2 * pd0 
+        img_rows = x.shape[-2] + 2 * pd0
        img_cols = x.shape[-1] + 2 * pd1

        # pad the image and its gradients
@@ -877,7 +1015,7 @@ class DownsampleFactorMaxGradGrad(Op):
                (x.shape[0], x.shape[1], img_rows, img_cols),
                dtype=x.dtype)
            ggx_padded[:, :, pd0:(img_rows-pd0), pd1:(img_cols-pd1)] = ggx
-            
+
        else:
            y_padded = x
            ggx_padded = ggx
@@ -893,7 +1031,7 @@ class DownsampleFactorMaxGradGrad(Op):
                            for col_ind in xrange(col_st, col_end):
                                if (maxout[n, k, r, c] == y_padded[n, k, row_ind, col_ind]):
                                    ggz[n, k, r, c] = ggx_padded[n, k, row_ind, col_ind]
-                                    
+
    def infer_shape(self, node, in_shapes):
        return [in_shapes[0]]

@@ -901,7 +1039,7 @@ class DownsampleFactorMaxGradGrad(Op):
        if self.mode != 'max':
            raise theano.gof.utils.MethodNotDefined()
        x, maxout, ggx = inp
-        z, = out # the grad of grad 
+        z, = out # the grad of grad
        fail = sub['fail']
        ignore_border = int(self.ignore_border)
        ds0, ds1 = self.ds
@@ -970,7 +1108,7 @@ class DownsampleFactorMaxGradGrad(Op):
                          (dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s, b, k, m, n)));
                        if (a == maximum){
                          z[0] += ggx[0];
-                        }  
+                        }
                      }
                    }
                  }
@@ -978,7 +1116,21 @@ class DownsampleFactorMaxGradGrad(Op):
              }
         }
        """%locals()
-    
+
    def c_code_cache_version(self):
        return (0,1)
-    
+
+@register_canonicalize('fast_compile')
+@gof.local_optimizer([MaxPoolGrad])
+def local_average_pool_grad(node):
+    # To assure backward compatibility with
+    # DownsampleFactorMaxGrad
+    if (not isinstance(node.op, MaxPoolGrad) or node.op.mode not in
+            ['sum','average_exc_pad', 'average_inc_pad']):
+        return False
+    return [AveragePoolGrad(ds=node.op.ds,
+                            ignore_border=node.op.ignore_border,
+                            st=node.op.st,
+                            padding=node.op.padding,
+                            mode=node.op.mode)(node.inputs[0],
+                                               node.inputs[2])]
--- a/theano/tensor/signal/tests/test_downsample.py
+++ b/theano/tensor/signal/tests/test_downsample.py
@@ -8,6 +8,7 @@ import theano
 import theano.tensor as tensor
 from theano.tests import unittest_tools as utt
 from theano.tensor.signal.downsample import (DownsampleFactorMax, max_pool_2d,
+                                             MaxPoolGrad, AveragePoolGrad,
                                             DownsampleFactorMaxGrad,
                                             DownsampleFactorMaxGradGrad,
                                             max_pool_2d_same_size)
@@ -417,12 +418,35 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                def mp(input, grad):
                    out = DownsampleFactorMax(
                        maxpoolshp, ignore_border=ignore_border)(input)
-                    grad_op = DownsampleFactorMaxGrad(
+                    grad_op = MaxPoolGrad(
                        maxpoolshp, ignore_border=ignore_border)
                    return grad_op(input, out, grad)

                utt.verify_grad(mp, [imval, grad_val], rng=rng)

+    def test_AveragePoolGrad_grad(self):
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        avgpoolshps = ((1, 1), (3, 2), (2, 3))
+        imval = rng.rand(2, 3, 3, 4) * 10.0
+        # more variance means numeric gradient will be more accurate
+
+        for avgpoolshp in avgpoolshps:
+            for ignore_border in [True, False]:
+                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
+                    # print 'maxpoolshp =', maxpoolshp
+                    # print 'ignore_border =', ignore_border
+                    # The shape of the gradient will be the shape of the output
+                    grad_shape = DownsampleFactorMax.out_shape(
+                        imval.shape, avgpoolshp, ignore_border=ignore_border)
+                    grad_val = rng.rand(*grad_shape) * 10.0
+
+                    def mp(input, grad):
+                        grad_op = AveragePoolGrad(
+                            avgpoolshp, ignore_border=ignore_border, mode=mode)
+                        return grad_op(input, grad)
+
+                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
+
    def test_DownsampleFactorMaxGrad_grad_st(self):
        """checks the gradient of the gradient for
        the case that stride is used"""
@@ -443,13 +467,38 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                        out = DownsampleFactorMax(
                            maxpoolshp, ignore_border=ignore_border,
                            st=stride)(input)
-                        grad_op = DownsampleFactorMaxGrad(
+                        grad_op = MaxPoolGrad(
                            maxpoolshp, ignore_border=ignore_border,
                            st=stride)
                        return grad_op(input, out, grad)

                    utt.verify_grad(mp, [imval, grad_val], rng=rng)

+    def test_AveragePoolGrad_grad_st(self):
+        """checks the gradient of the gradient for
+        the case that stride is used"""
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        avgpoolshps = ((1, 1), (3, 3), (5, 3))
+        stridesizes = ((1, 1), (3, 3), (5, 7))
+        imval = rng.rand(1, 2, 16, 16)
+
+        for avgpoolshp in avgpoolshps:
+            for ignore_border in [True, False]:
+                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
+                    for stride in stridesizes:
+                        grad_shape = DownsampleFactorMax.out_shape(
+                            imval.shape, avgpoolshp,
+                            ignore_border=ignore_border, st=stride)
+                        grad_val = rng.rand(*grad_shape)
+
+                        def mp(input, grad):
+                            grad_op = AveragePoolGrad(
+                                avgpoolshp, ignore_border=ignore_border,
+                                st=stride, mode=mode)
+                            return grad_op(input, grad)
+
+                        utt.verify_grad(mp, [imval, grad_val], rng=rng)
+
    def test_DownsampleFactorMaxGrad_grad_st_extra(self):
        """checks the gradient of the gradient for the case that
        stride is used for extra examples"""
@@ -475,7 +524,7 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                    out = DownsampleFactorMax(
                        maxpoolshp, ignore_border=ignore_border,
                        st=stride)(input)
-                    grad_op = DownsampleFactorMaxGrad(
+                    grad_op = MaxPoolGrad(
                        maxpoolshp, ignore_border=ignore_border,
                        st=stride)
                    return grad_op(input, out, grad)
@@ -484,14 +533,47 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                if numpy.prod(grad_shape) == 0:
                    continue
                utt.verify_grad(mp, [imval, grad_val], rng=rng)
-                
+
+    def test_AveragePoolGrad_grad_st_extra(self):
+        """checks the gradient of the gradient for the case that
+        stride is used for extra examples"""
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        avgpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
+        stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
+                       (2, 3), (10, 10), (1, 1))
+        imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
+                   (8, 5), (8, 5), (8, 5))
+
+        for indx in numpy.arange(len(avgpoolshps)):
+            imvsize = imvsizs[indx]
+            imval = rng.rand(1, 2, imvsize[0], imvsize[1])
+            stride = stridesizes[indx]
+            avgpoolshp = avgpoolshps[indx]
+            for ignore_border in [True, False]:
+                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
+                    grad_shape = DownsampleFactorMax.out_shape(
+                        imval.shape, avgpoolshp,
+                        ignore_border=ignore_border, st=stride)
+                    grad_val = rng.rand(*grad_shape)
+
+                    def mp(input, grad):
+                        grad_op = AveragePoolGrad(
+                            avgpoolshp, ignore_border=ignore_border,
+                            st=stride, mode=mode)
+                        return grad_op(input, grad)
+
+                    # skip the grad verification when the output is empty
+                    if numpy.prod(grad_shape) == 0:
+                        continue
+                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
+
    def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())        
+        rng = numpy.random.RandomState(utt.fetch_seed())
        imgsizes = ((10, 10), (10, 5), (5, 5))
        maxpoolsizes = ((5, 3), (3, 5), (3, 3))
        stridesizes = ((3, 2), (2, 3), (3, 3))
        paddingsizes = ((2, 2), (2, 1), (2, 2))
-        
+
        for i in range(len(imgsizes)):
            imgsize = imgsizes[i]
            imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
@@ -509,11 +591,38 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                    st=stridesize,
                    padding=paddingsize,
                    )(input)
-                grad_op = DownsampleFactorMaxGrad(maxpoolsize, ignore_border=True,
+                grad_op = MaxPoolGrad(maxpoolsize, ignore_border=True,
                                                  st=stridesize, padding=paddingsize)
                return grad_op(input, out, grad)
            utt.verify_grad(mp, [imval, grad_val], rng=rng)
-            
+
+    def test_AveragePoolPaddingStride_grad_grad(self):
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        imgsizes = ((10, 10), (10, 5), (5, 5))
+        avgpoolsizes = ((5, 3), (3, 5), (3, 3))
+        stridesizes = ((3, 2), (2, 3), (3, 3))
+        paddingsizes = ((2, 2), (2, 1), (2, 2))
+
+        for i in range(len(imgsizes)):
+            imgsize = imgsizes[i]
+            imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
+            avgpoolsize = avgpoolsizes[i]
+            stridesize = stridesizes[i]
+            paddingsize = paddingsizes[i]
+
+            #'average_exc_pad' with non-zero padding is not implemented
+            for mode in ['sum', 'average_inc_pad']:
+                grad_shape = DownsampleFactorMax.out_shape(
+                        imval.shape, avgpoolsize, st=stridesize,
+                    ignore_border=True, padding=paddingsize)
+                grad_val = rng.rand(*grad_shape) * 10.0
+                def mp(input, grad):
+                    grad_op = AveragePoolGrad(avgpoolsize, ignore_border=True,
+                                      st=stridesize, padding=paddingsize,
+                                      mode=mode)
+                    return grad_op(input, grad)
+                utt.verify_grad(mp, [imval, grad_val], rng=rng)
+
    def test_DownsampleFactorMax_hessian(self):
        # Example provided by Frans Cronje, see
        # https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J
@@ -681,18 +790,43 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                                            padding=padding)(image)],
                                            [image_val], DownsampleFactorMax)

-                    # checking shapes generated by DownsampleFactorMaxGrad
+                    # checking shapes generated by MaxPoolGrad
                    maxout_val = rng.rand(*out_shapes[k][i][j])
                    gz_val = rng.rand(*out_shapes[k][i][j])
                    self._compile_and_check([image, maxout, gz],
-                                            [DownsampleFactorMaxGrad(maxpoolshp,
+                                            [MaxPoolGrad(maxpoolshp,
                                            ignore_border=ignore_border,
                                            padding=padding)
                                            (image, maxout, gz)],
                                            [image_val, maxout_val, gz_val],
-                                            DownsampleFactorMaxGrad,
+                                            MaxPoolGrad,
                                            warn=False)

+    def test_opt_max_to_average(self):
+        im = theano.tensor.tensor4()
+        maxout = theano.tensor.tensor4()
+        grad = theano.tensor.tensor4()
+
+        compilation_mode=theano.compile.get_default_mode().including(
+            'local_average_pool_grad')
+
+        for mode in ['max', 'sum', 'average_inc_pad', 'average_exc_pad']:
+            f = theano.function([im, maxout, grad],
+                    DownsampleFactorMaxGrad(ds=(3,3),
+                                            ignore_border=False,
+                                            mode=mode)(im, maxout, grad),
+                    mode=compilation_mode)
+
+            if mode == 'max':
+                assert any(isinstance(n.op, MaxPoolGrad)
+                    for n in f.maker.fgraph.toposort())
+                assert not any(isinstance(n.op, AveragePoolGrad)
+                    for n in f.maker.fgraph.toposort())
+            else:
+                assert not any(isinstance(n.op, MaxPoolGrad)
+                    for n in f.maker.fgraph.toposort())
+                assert any(isinstance(n.op, AveragePoolGrad)
+                    for n in f.maker.fgraph.toposort())

 if __name__ == '__main__':
    unittest.main()