提交 24ace594 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3774 from lamblin/abstractconv_fixes

Fix broadcastable pattern of gradient in abstract conv
......@@ -5,7 +5,7 @@ Define abstract conv2d interface
import logging
import theano
from theano.tensor import as_tensor_variable
from theano.tensor import as_tensor_variable, patternbroadcast
from theano.gof import Apply, Op
......@@ -314,6 +314,12 @@ class AbstractConv2d(BaseAbstractConv2d):
self.filter_flip)(
bottom, top, weights.shape[-2:])
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
d_weights = patternbroadcast(d_weights, weights.broadcastable)
return d_bottom, d_weights
......@@ -369,6 +375,12 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
self.border_mode,
self.subsample,
self.filter_flip)(bottom, weights)
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
d_top = patternbroadcast(d_top, top.broadcastable)
d_height_width = (theano.gradient.DisconnectedType()(),)
return (d_bottom, d_top) + d_height_width
......@@ -425,6 +437,12 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
d_top = AbstractConv2d(self.imshp, self.kshp,
self.border_mode, self.subsample)(
bottom, weights)
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
d_weights = patternbroadcast(d_weights, weights.broadcastable)
d_top = patternbroadcast(d_top, top.broadcastable)
d_height_width = (theano.gradient.DisconnectedType()(),)
return (d_weights, d_top) + d_height_width
......
......@@ -521,6 +521,29 @@ class TestConv2D(utt.InferShapeTester):
border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
# Test that broadcasting of gradients works correctly when using the
# nnet.conv2d() interface. This was reported in #3763, and uses the example
# code from that ticket.
def test_broadcast_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
x1 = T.tensor4('x')
x1_data = rng.randn(1, 1, 300, 300)
sigma = T.scalar('sigma')
sigma_data = 20
window_radius = 3
filter_1d = T.arange(-window_radius, window_radius+1)
filter_1d = filter_1d.astype(theano.config.floatX)
filter_1d = T.exp(-0.5*filter_1d**2/sigma**2)
filter_1d = filter_1d / filter_1d.sum()
filter_W = filter_1d.dimshuffle(['x', 'x', 0, 'x'])
y = theano.tensor.nnet.conv2d(x1, filter_W, border_mode='full',
filter_shape=[1, 1, None, None])
theano.grad(y.sum(), sigma)
if __name__ == '__main__':
t = TestConv2D('setUp')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论