Turns out we needed support for gradient of ConvOp in "full" mode ! (hint: reconstruction)

上级 4867ab0b
......@@ -106,34 +106,33 @@ class ConvOp(Op):
* will crash if filter the same size as input image
"""
# TODO: "full" mode should be supported. When in full mode, the hidden
# layer is larger than the input image. It therefore cannot be used as
# the kernel in the vis * hid convolution.
# Two possible solutions:
# - modify convolution code to support kernels of arbitrary shape
# - convolve the hidden w/ the visible layer as the kernel, then
# DimShuffle. Also verify that this works :)
if self.out_mode != 'valid':
raise NotImplementedError('Only "valid" mode is currently supported in the gradient')
####### Determine gradient on kernels ########
mode = self.out_mode
if inputs.ndim == 3:
inputs = tensor.shape_padleft(inputs,1)
img = tensor.DimShuffle(inputs.broadcastable, (1,0,2,3))(inputs)
imshp = N.hstack((self.bsize, self.imshp[1:]))
bsize = self.imshp[0]
newin = tensor.DimShuffle(inputs.broadcastable, (1,0,2,3))(inputs)
newgz = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(gz)
if self.out_mode == 'valid':
(img, filters) = (newin, newgz)
(bsize, nkern) = (self.imshp[0], self.nkern)
imshp = N.hstack((self.bsize, self.imshp[1:]))
kshp = self.outshp[::-1]
elif self.out_mode == 'full':
(img, filters) = (newgz, newin)
(bsize, nkern) = (self.nkern, self.imshp[0])
imshp = N.hstack((self.bsize, self.outshp))
kshp = self.imshp[1:][::-1]
else:
raise NotImplementedError('Only [full,valid] modes are currently supported.')
nkern = self.nkern
filters = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(gz)
filters = filters[:,:,::-1,::-1]
kshp = self.outshp[::-1]
dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode=mode)(img,filters)
dw = tensor.DimShuffle(dw.broadcastable, (1,0,2,3))(dw)
dw = dw[:,:,::-1,::-1]
dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid')(img,filters)
if self.out_mode == 'valid':
# before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
dw = tensor.DimShuffle(dw.broadcastable, (1,0,2,3))(dw)
dw = dw[:,:,::-1,::-1]
####### Determine gradient on inputs ########
mode = 'valid' if self.out_mode == 'full' else 'full'
......
......@@ -309,19 +309,17 @@ class TestConvOp(unittest.TestCase):
kerns = T.dmatrix('kerns')
for mode in 'valid', 'full':
for imshp in (5,5),(2,5,5),(2,10,10): # (12,10), (3,12,11):
for imshp in (5,5),(2,5,5),(2,10,10): # (12,10), (3,12,11):
visdim = 1 if len(imshp)!=3 else imshp[0]
print 'visdim = ', visdim
for kshp in (3,3),:# (6,7):
imgvals = N.random.random(N.hstack((bsize,imshp)))
print 'imgvals.shape = ', imgvals.shape
imgvals = imgvals.reshape(bsize,-1)
if visdim == 1:
kernvals = N.random.rand(nkern,kshp[0],kshp[1])
else:
kernvals = N.random.rand(nkern,visdim,kshp[0],kshp[1])
print 'kernvals.shape = ', kernvals.shape
kernvals = kernvals.reshape(nkern,-1)
def testf(imgs, kerns):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论