Merge pull request #3774 from lamblin/abstractconv_fixes

Fix broadcastable pattern of gradient in abstract conv

Merge pull request #3774 from lamblin/abstractconv_fixes
24ace594 · Frédéric Bastien · 458e1594 · 0f5f1890 · 24ace594 · 24ace594
--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -5,7 +5,7 @@ Define abstract conv2d interface
 import logging
 import theano
-from theano.tensor import as_tensor_variable
+from theano.tensor import as_tensor_variable, patternbroadcast
 from theano.gof import Apply, Op
@@ -314,6 +314,12 @@ class AbstractConv2d(BaseAbstractConv2d):
                                               self.filter_flip)(
            bottom, top, weights.shape[-2:])
+        # Make sure that the broadcastable pattern of the inputs is used
+        # for the gradients, even if the grad opts are not able to infer
+        # that the dimensions are broadcastable.
+        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
+        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        return d_bottom, d_weights
@@ -369,6 +375,12 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
                               self.border_mode,
                               self.subsample,
                               self.filter_flip)(bottom, weights)
+        # Make sure that the broadcastable pattern of the inputs is used
+        # for the gradients, even if the grad opts are not able to infer
+        # that the dimensions are broadcastable.
+        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
+        d_top = patternbroadcast(d_top, top.broadcastable)
        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_bottom, d_top) + d_height_width
@@ -425,6 +437,12 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
        d_top = AbstractConv2d(self.imshp, self.kshp,
                               self.border_mode, self.subsample)(
                                   bottom, weights)
+        # Make sure that the broadcastable pattern of the inputs is used
+        # for the gradients, even if the grad opts are not able to infer
+        # that the dimensions are broadcastable.
+        d_weights = patternbroadcast(d_weights, weights.broadcastable)
+        d_top = patternbroadcast(d_top, top.broadcastable)
        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_weights, d_top) + d_height_width

--- a/theano/tensor/nnet/tests/test_conv.py
+++ b/theano/tensor/nnet/tests/test_conv.py
@@ -521,6 +521,29 @@ class TestConv2D(utt.InferShapeTester):
                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+# Test that broadcasting of gradients works correctly when using the
+# nnet.conv2d() interface. This was reported in #3763, and uses the example
+# code from that ticket.
+def test_broadcast_grad():
+    rng = numpy.random.RandomState(utt.fetch_seed())
+    x1 = T.tensor4('x')
+    x1_data = rng.randn(1, 1, 300, 300)
+    sigma = T.scalar('sigma')
+    sigma_data = 20
+    window_radius = 3
+    filter_1d = T.arange(-window_radius, window_radius+1)
+    filter_1d = filter_1d.astype(theano.config.floatX)
+    filter_1d = T.exp(-0.5*filter_1d**2/sigma**2)
+    filter_1d = filter_1d / filter_1d.sum()
+    filter_W = filter_1d.dimshuffle(['x', 'x', 0, 'x'])
+    y = theano.tensor.nnet.conv2d(x1, filter_W, border_mode='full',
+                                  filter_shape=[1, 1, None, None])
+    theano.grad(y.sum(), sigma)
 if __name__ == '__main__':
    t = TestConv2D('setUp')