Grad of grad (with tests)

a6388954 · sebastien-j · 85f08bfd · a6388954 · a6388954
--- a/theano/tensor/signal/downsample.py
+++ b/theano/tensor/signal/downsample.py
@@ -624,8 +624,6 @@ class MaxPoolGrad(PoolGrad):
        return Apply(self, [x, maxout, gz], [x.type()])

    def perform(self, node, inp, out):
-        if self.mode not in ('max', 'sum') and self.padding != (0, 0):
-            raise NotImplementedError()
        x, maxout, gz = inp
        gx_stg, = out
        # number of pooling output rows
@@ -638,8 +636,6 @@ class MaxPoolGrad(PoolGrad):
        pad_w = self.padding[1]
        img_rows = x.shape[-2] + 2 * pad_h
        img_cols = x.shape[-1] + 2 * pad_w
-        inc_pad = self.mode == 'average_inc_pad'
-        sum_mode = self.mode == 'sum'

        # pad the image
        if self.padding != (0, 0):
@@ -676,8 +672,6 @@ class MaxPoolGrad(PoolGrad):
                    st=self.st, padding=self.padding)(x, maxout, ggx)]

    def c_code(self, node, name, inp, out, sub):
-        if self.mode != 'max':
-            raise theano.gof.utils.MethodNotDefined()
        x, z, gz = inp
        gx, = out
        fail = sub['fail']
@@ -795,7 +789,7 @@ class MaxPoolGrad(PoolGrad):

 class AveragePoolGrad(PoolGrad):

-    def __init__(self, ds, ignore_border, st=None, padding=(0, 0), mode='avg_exc_pad'):
+    def __init__(self, ds, ignore_border, st=None, padding=(0, 0), mode='average_inc_pad'):
        PoolGrad.__init__(self, ds, ignore_border, st, padding, mode)

    def make_node(self, x, gz):
@@ -809,7 +803,7 @@ class AveragePoolGrad(PoolGrad):
        return Apply(self, [x, gz], [x.type()])

    def perform(self, node, inp, out):
-        if self.mode not in ('max', 'sum') and self.padding != (0, 0):
+        if self.mode == 'average_exc_pad' and self.padding != (0, 0):
            raise NotImplementedError()
        x, gz = inp
        gx_stg, = out
@@ -869,8 +863,9 @@ class AveragePoolGrad(PoolGrad):
        x, gz = inp
        ggx, = grads
        return [theano.tensor.zeros_like(x),
-                theano.gradient.grad_not_implemented(
-                    self, 2, gz, 'Hessian not implemented with padding')]
+                DownsampleFactorMax(
+                    self.ds, ignore_border=self.ignore_border,
+                    st=self.st, padding=self.padding, mode=self.mode)(ggx)]

 class DownsampleFactorMaxGradGrad(Op):
    __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
@@ -974,7 +969,7 @@ class DownsampleFactorMaxGradGrad(Op):

    def make_node(self, x, maxout, gz):
        # make_node should only be called by the grad function of
-        # DownsampleFactorMaxGrad, so these asserts should not fail.
+        # MaxPoolGrad, so these asserts should not fail.
        assert isinstance(x, Variable) and x.ndim == 4
        assert isinstance(maxout, Variable) and maxout.ndim == 4
        assert isinstance(gz, Variable) and gz.ndim == 4
@@ -1004,7 +999,7 @@ class DownsampleFactorMaxGradGrad(Op):
        ds0, ds1 = self.ds
        st0, st1 = self.st
        pd0, pd1 = self.padding
-        img_rows = x.shape[-2] + 2 * pd0 
+        img_rows = x.shape[-2] + 2 * pd0
        img_cols = x.shape[-1] + 2 * pd1

        # pad the image and its gradients
@@ -1017,7 +1012,7 @@ class DownsampleFactorMaxGradGrad(Op):
                (x.shape[0], x.shape[1], img_rows, img_cols),
                dtype=x.dtype)
            ggx_padded[:, :, pd0:(img_rows-pd0), pd1:(img_cols-pd1)] = ggx
-            
+
        else:
            y_padded = x
            ggx_padded = ggx
@@ -1033,7 +1028,7 @@ class DownsampleFactorMaxGradGrad(Op):
                            for col_ind in xrange(col_st, col_end):
                                if (maxout[n, k, r, c] == y_padded[n, k, row_ind, col_ind]):
                                    ggz[n, k, r, c] = ggx_padded[n, k, row_ind, col_ind]
-                                    
+
    def infer_shape(self, node, in_shapes):
        return [in_shapes[0]]

@@ -1041,7 +1036,7 @@ class DownsampleFactorMaxGradGrad(Op):
        if self.mode != 'max':
            raise theano.gof.utils.MethodNotDefined()
        x, maxout, ggx = inp
-        z, = out # the grad of grad 
+        z, = out # the grad of grad
        fail = sub['fail']
        ignore_border = int(self.ignore_border)
        ds0, ds1 = self.ds
@@ -1110,7 +1105,7 @@ class DownsampleFactorMaxGradGrad(Op):
                          (dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s, b, k, m, n)));
                        if (a == maximum){
                          z[0] += ggx[0];
-                        }  
+                        }
                      }
                    }
                  }
@@ -1118,7 +1113,7 @@ class DownsampleFactorMaxGradGrad(Op):
              }
         }
        """%locals()
-    
+
    def c_code_cache_version(self):
        return (0,1)
-    
+
--- a/theano/tensor/signal/tests/test_downsample.py
+++ b/theano/tensor/signal/tests/test_downsample.py
@@ -423,6 +423,29 @@ class TestDownsampleFactorMax(utt.InferShapeTester):

                utt.verify_grad(mp, [imval, grad_val], rng=rng)

+    def test_AveragePoolGrad_grad(self):
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        avgpoolshps = ((1, 1), (3, 2), (2, 3))
+        imval = rng.rand(2, 3, 3, 4) * 10.0
+        # more variance means numeric gradient will be more accurate
+
+        for avgpoolshp in avgpoolshps:
+            for ignore_border in [True, False]:
+                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
+                    # print 'maxpoolshp =', maxpoolshp
+                    # print 'ignore_border =', ignore_border
+                    # The shape of the gradient will be the shape of the output
+                    grad_shape = DownsampleFactorMax.out_shape(
+                        imval.shape, avgpoolshp, ignore_border=ignore_border)
+                    grad_val = rng.rand(*grad_shape) * 10.0
+
+                    def mp(input, grad):
+                        grad_op = AveragePoolGrad(
+                            avgpoolshp, ignore_border=ignore_border, mode=mode)
+                        return grad_op(input, grad)
+
+                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
+
    def test_DownsampleFactorMaxGrad_grad_st(self):
        """checks the gradient of the gradient for
        the case that stride is used"""
@@ -450,6 +473,31 @@ class TestDownsampleFactorMax(utt.InferShapeTester):

                    utt.verify_grad(mp, [imval, grad_val], rng=rng)

+    def test_AveragePoolGrad_grad_st(self):
+        """checks the gradient of the gradient for
+        the case that stride is used"""
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        avgpoolshps = ((1, 1), (3, 3), (5, 3))
+        stridesizes = ((1, 1), (3, 3), (5, 7))
+        imval = rng.rand(1, 2, 16, 16)
+
+        for avgpoolshp in avgpoolshps:
+            for ignore_border in [True, False]:
+                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
+                    for stride in stridesizes:
+                        grad_shape = DownsampleFactorMax.out_shape(
+                            imval.shape, avgpoolshp,
+                            ignore_border=ignore_border, st=stride)
+                        grad_val = rng.rand(*grad_shape)
+
+                        def mp(input, grad):
+                            grad_op = AveragePoolGrad(
+                                avgpoolshp, ignore_border=ignore_border,
+                                st=stride, mode=mode)
+                            return grad_op(input, grad)
+
+                        utt.verify_grad(mp, [imval, grad_val], rng=rng)
+
    def test_DownsampleFactorMaxGrad_grad_st_extra(self):
        """checks the gradient of the gradient for the case that
        stride is used for extra examples"""
@@ -484,14 +532,47 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                if numpy.prod(grad_shape) == 0:
                    continue
                utt.verify_grad(mp, [imval, grad_val], rng=rng)
-                
+
+    def test_AveragePoolGrad_grad_st_extra(self):
+        """checks the gradient of the gradient for the case that
+        stride is used for extra examples"""
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        avgpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
+        stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
+                       (2, 3), (10, 10), (1, 1))
+        imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
+                   (8, 5), (8, 5), (8, 5))
+
+        for indx in numpy.arange(len(avgpoolshps)):
+            imvsize = imvsizs[indx]
+            imval = rng.rand(1, 2, imvsize[0], imvsize[1])
+            stride = stridesizes[indx]
+            avgpoolshp = avgpoolshps[indx]
+            for ignore_border in [True, False]:
+                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
+                    grad_shape = DownsampleFactorMax.out_shape(
+                        imval.shape, avgpoolshp,
+                        ignore_border=ignore_border, st=stride)
+                    grad_val = rng.rand(*grad_shape)
+
+                    def mp(input, grad):
+                        grad_op = AveragePoolGrad(
+                            avgpoolshp, ignore_border=ignore_border,
+                            st=stride, mode=mode)
+                        return grad_op(input, grad)
+
+                    # skip the grad verification when the output is empty
+                    if numpy.prod(grad_shape) == 0:
+                        continue
+                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
+
    def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())        
+        rng = numpy.random.RandomState(utt.fetch_seed())
        imgsizes = ((10, 10), (10, 5), (5, 5))
        maxpoolsizes = ((5, 3), (3, 5), (3, 3))
        stridesizes = ((3, 2), (2, 3), (3, 3))
        paddingsizes = ((2, 2), (2, 1), (2, 2))
-        
+
        for i in range(len(imgsizes)):
            imgsize = imgsizes[i]
            imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
@@ -513,7 +594,34 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                                                  st=stridesize, padding=paddingsize)
                return grad_op(input, out, grad)
            utt.verify_grad(mp, [imval, grad_val], rng=rng)
-            
+
+    def test_AveragePoolPaddingStride_grad_grad(self):
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        imgsizes = ((10, 10), (10, 5), (5, 5))
+        avgpoolsizes = ((5, 3), (3, 5), (3, 3))
+        stridesizes = ((3, 2), (2, 3), (3, 3))
+        paddingsizes = ((2, 2), (2, 1), (2, 2))
+
+        for i in range(len(imgsizes)):
+            imgsize = imgsizes[i]
+            imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
+            avgpoolsize = avgpoolsizes[i]
+            stridesize = stridesizes[i]
+            paddingsize = paddingsizes[i]
+
+            #'average_exc_pad' with non-zero padding is not implemented
+            for mode in ['sum', 'average_inc_pad']:
+                grad_shape = DownsampleFactorMax.out_shape(
+                        imval.shape, avgpoolsize, st=stridesize,
+                    ignore_border=True, padding=paddingsize)
+                grad_val = rng.rand(*grad_shape) * 10.0
+                def mp(input, grad):
+                    grad_op = AveragePoolGrad(avgpoolsize, ignore_border=True,
+                                      st=stridesize, padding=paddingsize,
+                                      mode=mode)
+                    return grad_op(input, grad)
+                utt.verify_grad(mp, [imval, grad_val], rng=rng)
+
    def test_DownsampleFactorMax_hessian(self):
        # Example provided by Frans Cronje, see
        # https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J
@@ -681,7 +789,7 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                                            padding=padding)(image)],
                                            [image_val], DownsampleFactorMax)

-                    # checking shapes generated by DownsampleFactorMaxGrad
+                    # checking shapes generated by MaxPoolGrad
                    maxout_val = rng.rand(*out_shapes[k][i][j])
                    gz_val = rng.rand(*out_shapes[k][i][j])
                    self._compile_and_check([image, maxout, gz],