Turns out we needed support for gradient of ConvOp in "full" mode ! (hint: reconstruction)

a78299b3 · desjagui@atchoum.iro.umontreal.ca · 4867ab0b · a78299b3 · a78299b3
--- a/theano/sandbox/conv.py
+++ b/theano/sandbox/conv.py
@@ -106,32 +106,31 @@ class ConvOp(Op):
        * will crash if filter the same size as input image
        """
-        # TODO: "full" mode should be supported. When in full mode, the hidden
-        # layer is larger than the input image. It therefore cannot be used as
-        # the kernel in the vis * hid convolution.
-        # Two possible solutions: 
-        # - modify convolution code to support kernels of arbitrary shape
-        # - convolve the hidden w/ the visible layer as the kernel, then
-        # DimShuffle. Also verify that this works :)
-        if self.out_mode != 'valid':
-            raise NotImplementedError('Only "valid" mode is currently supported in the gradient')
        ####### Determine gradient on kernels ########
-        mode = self.out_mode
        if inputs.ndim == 3:
            inputs = tensor.shape_padleft(inputs,1)
-        img = tensor.DimShuffle(inputs.broadcastable, (1,0,2,3))(inputs)
+        newin = tensor.DimShuffle(inputs.broadcastable, (1,0,2,3))(inputs)
+        newgz = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(gz)
+        if self.out_mode == 'valid':
+            (img, filters) = (newin, newgz)
+            (bsize, nkern) = (self.imshp[0], self.nkern)
            imshp = N.hstack((self.bsize, self.imshp[1:]))
-        bsize = self.imshp[0]
+            kshp  = self.outshp[::-1]
+        elif self.out_mode == 'full':
+            (img, filters) = (newgz, newin)
+            (bsize, nkern) = (self.nkern, self.imshp[0])
+            imshp = N.hstack((self.bsize, self.outshp))
+            kshp  = self.imshp[1:][::-1]
+        else:
+            raise NotImplementedError('Only [full,valid] modes are currently supported.')
-        nkern = self.nkern
-        filters = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(gz)
        filters = filters[:,:,::-1,::-1]
-        kshp  = self.outshp[::-1]
+        dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid')(img,filters)
+        if self.out_mode == 'valid':
-        dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode=mode)(img,filters)
+            # before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
            dw = tensor.DimShuffle(dw.broadcastable, (1,0,2,3))(dw)
            dw = dw[:,:,::-1,::-1]

--- a/theano/sandbox/test_conv.py
+++ b/theano/sandbox/test_conv.py
@@ -314,14 +314,12 @@ class TestConvOp(unittest.TestCase):
                print 'visdim = ', visdim
                for kshp in (3,3),:# (6,7):
                    imgvals = N.random.random(N.hstack((bsize,imshp)))
-                    print 'imgvals.shape = ', imgvals.shape
                    imgvals = imgvals.reshape(bsize,-1)
                    if visdim == 1: 
                        kernvals = N.random.rand(nkern,kshp[0],kshp[1])
                    else:
                        kernvals = N.random.rand(nkern,visdim,kshp[0],kshp[1])
-                    print 'kernvals.shape = ', kernvals.shape
                    kernvals = kernvals.reshape(nkern,-1)
                    def testf(imgs, kerns):