Turns out we needed support for gradient of ConvOp in "full" mode ! (hint: reconstruction)

上级 4867ab0b
...@@ -106,32 +106,31 @@ class ConvOp(Op): ...@@ -106,32 +106,31 @@ class ConvOp(Op):
* will crash if filter the same size as input image * will crash if filter the same size as input image
""" """
# TODO: "full" mode should be supported. When in full mode, the hidden
# layer is larger than the input image. It therefore cannot be used as
# the kernel in the vis * hid convolution.
# Two possible solutions:
# - modify convolution code to support kernels of arbitrary shape
# - convolve the hidden w/ the visible layer as the kernel, then
# DimShuffle. Also verify that this works :)
if self.out_mode != 'valid':
raise NotImplementedError('Only "valid" mode is currently supported in the gradient')
####### Determine gradient on kernels ######## ####### Determine gradient on kernels ########
mode = self.out_mode
if inputs.ndim == 3: if inputs.ndim == 3:
inputs = tensor.shape_padleft(inputs,1) inputs = tensor.shape_padleft(inputs,1)
img = tensor.DimShuffle(inputs.broadcastable, (1,0,2,3))(inputs) newin = tensor.DimShuffle(inputs.broadcastable, (1,0,2,3))(inputs)
newgz = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(gz)
if self.out_mode == 'valid':
(img, filters) = (newin, newgz)
(bsize, nkern) = (self.imshp[0], self.nkern)
imshp = N.hstack((self.bsize, self.imshp[1:])) imshp = N.hstack((self.bsize, self.imshp[1:]))
bsize = self.imshp[0] kshp = self.outshp[::-1]
elif self.out_mode == 'full':
(img, filters) = (newgz, newin)
(bsize, nkern) = (self.nkern, self.imshp[0])
imshp = N.hstack((self.bsize, self.outshp))
kshp = self.imshp[1:][::-1]
else:
raise NotImplementedError('Only [full,valid] modes are currently supported.')
nkern = self.nkern
filters = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(gz)
filters = filters[:,:,::-1,::-1] filters = filters[:,:,::-1,::-1]
kshp = self.outshp[::-1] dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid')(img,filters)
if self.out_mode == 'valid':
dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode=mode)(img,filters) # before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
dw = tensor.DimShuffle(dw.broadcastable, (1,0,2,3))(dw) dw = tensor.DimShuffle(dw.broadcastable, (1,0,2,3))(dw)
dw = dw[:,:,::-1,::-1] dw = dw[:,:,::-1,::-1]
......
...@@ -314,14 +314,12 @@ class TestConvOp(unittest.TestCase): ...@@ -314,14 +314,12 @@ class TestConvOp(unittest.TestCase):
print 'visdim = ', visdim print 'visdim = ', visdim
for kshp in (3,3),:# (6,7): for kshp in (3,3),:# (6,7):
imgvals = N.random.random(N.hstack((bsize,imshp))) imgvals = N.random.random(N.hstack((bsize,imshp)))
print 'imgvals.shape = ', imgvals.shape
imgvals = imgvals.reshape(bsize,-1) imgvals = imgvals.reshape(bsize,-1)
if visdim == 1: if visdim == 1:
kernvals = N.random.rand(nkern,kshp[0],kshp[1]) kernvals = N.random.rand(nkern,kshp[0],kshp[1])
else: else:
kernvals = N.random.rand(nkern,visdim,kshp[0],kshp[1]) kernvals = N.random.rand(nkern,visdim,kshp[0],kshp[1])
print 'kernvals.shape = ', kernvals.shape
kernvals = kernvals.reshape(nkern,-1) kernvals = kernvals.reshape(nkern,-1)
def testf(imgs, kerns): def testf(imgs, kerns):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论