Turns out we needed support for gradient of ConvOp in "full" mode ! (hint: reconstruction)

a78299b3 · desjagui@atchoum.iro.umontreal.ca · 4867ab0b · a78299b3 · a78299b3
--- a/theano/sandbox/conv.py
+++ b/theano/sandbox/conv.py
@@ -106,34 +106,33 @@ class ConvOp(Op):
        * will crash if filter the same size as input image
        """

-        # TODO: "full" mode should be supported. When in full mode, the hidden
-        # layer is larger than the input image. It therefore cannot be used as
-        # the kernel in the vis * hid convolution.
-        # Two possible solutions: 
-        # - modify convolution code to support kernels of arbitrary shape
-        # - convolve the hidden w/ the visible layer as the kernel, then
-        # DimShuffle. Also verify that this works :)
-        if self.out_mode != 'valid':
-            raise NotImplementedError('Only "valid" mode is currently supported in the gradient')
-
        ####### Determine gradient on kernels ########
-        mode = self.out_mode
        if inputs.ndim == 3:
            inputs = tensor.shape_padleft(inputs,1)

-        img = tensor.DimShuffle(inputs.broadcastable, (1,0,2,3))(inputs)
-        imshp = N.hstack((self.bsize, self.imshp[1:]))
-        bsize = self.imshp[0]
+        newin = tensor.DimShuffle(inputs.broadcastable, (1,0,2,3))(inputs)
+        newgz = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(gz)
+    
+        if self.out_mode == 'valid':
+            (img, filters) = (newin, newgz)
+            (bsize, nkern) = (self.imshp[0], self.nkern)
+            imshp = N.hstack((self.bsize, self.imshp[1:]))
+            kshp  = self.outshp[::-1]
+        elif self.out_mode == 'full':
+            (img, filters) = (newgz, newin)
+            (bsize, nkern) = (self.nkern, self.imshp[0])
+            imshp = N.hstack((self.bsize, self.outshp))
+            kshp  = self.imshp[1:][::-1]
+        else:
+            raise NotImplementedError('Only [full,valid] modes are currently supported.')

-        nkern = self.nkern
-        filters = tensor.DimShuffle(gz.broadcastable, (1,0,2,3))(gz)
        filters = filters[:,:,::-1,::-1]

-        kshp  = self.outshp[::-1]
-
-        dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode=mode)(img,filters)
-        dw = tensor.DimShuffle(dw.broadcastable, (1,0,2,3))(dw)
-        dw = dw[:,:,::-1,::-1]
+        dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid')(img,filters)
+        if self.out_mode == 'valid':
+            # before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
+            dw = tensor.DimShuffle(dw.broadcastable, (1,0,2,3))(dw)
+            dw = dw[:,:,::-1,::-1]

        ####### Determine gradient on inputs ########
        mode = 'valid' if self.out_mode == 'full' else 'full'

--- a/theano/sandbox/test_conv.py
+++ b/theano/sandbox/test_conv.py
@@ -309,19 +309,17 @@ class TestConvOp(unittest.TestCase):
        kerns = T.dmatrix('kerns')
     
        for mode in 'valid', 'full':
-            for imshp in  (5,5),(2,5,5),(2,10,10): # (12,10), (3,12,11):
+            for imshp in (5,5),(2,5,5),(2,10,10): # (12,10), (3,12,11):
                visdim = 1 if len(imshp)!=3 else imshp[0]
                print 'visdim = ', visdim
                for kshp in (3,3),:# (6,7):
                    imgvals = N.random.random(N.hstack((bsize,imshp)))
-                    print 'imgvals.shape = ', imgvals.shape
                    imgvals = imgvals.reshape(bsize,-1)

                    if visdim == 1: 
                        kernvals = N.random.rand(nkern,kshp[0],kshp[1])
                    else:
                        kernvals = N.random.rand(nkern,visdim,kshp[0],kshp[1])
-                    print 'kernvals.shape = ', kernvals.shape
                    kernvals = kernvals.reshape(nkern,-1)

                    def testf(imgs, kerns):