Enable border_mode != valid and filter_dilation in GpuCorr3dMM.

This reuses the implementation of GpuCorr2dMM and its gradient ops.

Enable border_mode != valid and filter_dilation in GpuCorr3dMM.
1d2411c6 · Gijs van Tulder · 1d9aff8a · 1d2411c6 · 1d2411c6 · 1d2411c6
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
--- a/theano/sandbox/cuda/corr3d_gemm.cu
+++ b/theano/sandbox/cuda/corr3d_gemm.cu
--- a/theano/sandbox/cuda/tests/test_gemmcorr3d.py
+++ b/theano/sandbox/cuda/tests/test_gemmcorr3d.py
 from __future__ import absolute_import, print_function, division
 import unittest
 import numpy
+from six.moves import xrange
+try:
+    from scipy import ndimage
+except ImportError:
+    ndimage = None
 import theano
 from theano.tests import unittest_tools as utt
@@ -21,31 +26,127 @@ else:
    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
+# python reference implementation of a 3D convolution
+# see also: theano.tensor.nnet.tests.test_conv3d2d
+# expects: (batch, 0, channels, 1, 2)
+def pyconv3d(signals, filters, border_mode='valid', dilation=(1, 1, 1)):
+    Ns, Ts, C, Hs, Ws = signals.shape
+    Nf, Tf, C, Hf, Wf = filters.shape
+    Tdil, Hdil, Wdil = dilation
+    Tfdil = (Tf - 1) * Tdil + 1
+    Hfdil = (Hf - 1) * Hdil + 1
+    Wfdil = (Wf - 1) * Wdil + 1
+    # if border_mode is not 'valid', the signals need zero-padding
+    if border_mode == 'full':
+        Tpad = Tfdil - 1
+        Hpad = Hfdil - 1
+        Wpad = Wfdil - 1
+    elif border_mode == 'half':
+        Tpad = Tfdil // 2
+        Hpad = Hfdil // 2
+        Wpad = Wfdil // 2
+    elif isinstance(border_mode, tuple):
+        Tpad, Hpad, Wpad = map(int, border_mode)
+    else:
+        Tpad = 0
+        Hpad = 0
+        Wpad = 0
+    if Tpad > 0 or Hpad > 0 or Wpad > 0:
+        # zero-pad signals
+        signals_padded = numpy.zeros((Ns, Ts + 2 * Tpad, C,
+                                      Hs + 2 * Hpad, Ws + 2 * Wpad), 'float32')
+        signals_padded[:, Tpad:(Ts + Tpad), :, Hpad:(Hs + Hpad),
+                       Wpad:(Ws + Wpad)] = signals
+        Ns, Ts, C, Hs, Ws = signals_padded.shape
+        signals = signals_padded
+    Tfdil2 = Tfdil // 2
+    Hfdil2 = Hfdil // 2
+    Wfdil2 = Wfdil // 2
+    dilated_filters = numpy.zeros((Nf, Tfdil, C, Hfdil, Wfdil), dtype=filters.dtype)
+    dilated_filters[:, ::Tdil, :, ::Hdil, ::Wdil] = filters
+    # perform valid convolution on the padded signals
+    rval = numpy.zeros((Ns, Ts - Tfdil + 1, Nf, Hs - Hfdil + 1, Ws - Wfdil + 1))
+    for ns in xrange(Ns):
+        for nf in xrange(Nf):
+            for c in xrange(C):
+                s_i = signals[ns, :, c, :, :]
+                f_i = dilated_filters[nf, :, c, :, :]
+                r_i = rval[ns, :, nf, :, :]
+                # scipy.signal.convolve performs valid convolution,
+                # but is quite slow. scipy.ndimage.convolve is faster
+                # only supports 'same' convolution.
+                # origin must be -1 for even filters, 0 for odd filters
+                o_i = ndimage.convolve(s_i, f_i, mode='constant', cval=1,
+                                       origin=(f_i.shape[0] % 2 - 1,
+                                               f_i.shape[1] % 2 - 1,
+                                               f_i.shape[2] % 2 - 1))
+                # crop to get the result of 'valid' convolution
+                o_i = o_i[Tfdil2:(r_i.shape[0] + Tfdil2),
+                          Hfdil2:(r_i.shape[1] + Hfdil2),
+                          Wfdil2:(r_i.shape[2] + Wfdil2)]
+                # the result should be equal to 'valid' convolution
+                # utt.assert_allclose(o_i, signal.convolve(s_i, f_i, mode='valid'))
+                r_i += o_i
+    return rval
 class TestCorr3DMM(unittest.TestCase):
    def run_conv_valid(self, inputs_shape, filters_shape,
-                       subsample=(1, 1, 1)):
+                       border_mode='valid',
+                       filter_dilation=(1, 1, 1),
+                       subsample=(1, 1, 1),
+                       verify_grad=False):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))
-        conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters,
-                                             b=bias, d=subsample)
+        if filter_dilation == (1, 1, 1) and border_mode in ('valid', (0, 0, 0)):
-        conv = GpuCorr3dMM(border_mode="valid",
+            conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters,
+                                                 b=bias, d=subsample)
+            f_ref = theano.function([], conv_ref)
+            res_ref = f_ref()
+        elif subsample == (1, 1, 1):
+            if ndimage is None:
+                raise SkipTest('This test needs SciPy.')
+            # input = b012c
+            # pyconv3d wants = b0c12 = (0, 1, 4, 2, 3)
+            # pyconv3d outputs = b0c12 = (0, 1, 3, 4, 2)
+            res_ref = pyconv3d(signals=inputs_val.transpose(0, 1, 4, 2, 3),
+                               filters=filters_val.transpose(0, 1, 4, 2, 3)[:, ::-1, :, ::-1, ::-1],
+                               dilation=filter_dilation,
+                               border_mode=border_mode).transpose(0, 1, 3, 4, 2)
+        else:
+            raise SkipTest('No reference implementation that combines '
+                           'border_mode and subsampling.')
+        conv = GpuCorr3dMM(border_mode=border_mode,
+                           filter_dilation=filter_dilation,
                           subsample=subsample)(
                               inputs.dimshuffle(0, 4, 1, 2, 3),
                               filters.dimshuffle(0, 4, 1, 2, 3))
        conv = conv.dimshuffle(0, 2, 3, 4, 1)
-        f_ref = theano.function([], conv_ref)
        f = theano.function([], conv, mode=mode_with_gpu)
-        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
+        if verify_grad:
+            utt.verify_grad(GpuCorr3dMM(border_mode=border_mode,
+                                        filter_dilation=filter_dilation,
+                                        subsample=subsample),
+                            [inputs_val.transpose(0, 4, 1, 2, 3),
+                             filters_val.transpose(0, 4, 1, 2, 3)])
    def test_valid(self):
        self.run_conv_valid(inputs_shape=(16, 20, 12, 16, 1),
                            filters_shape=(10, 6, 12, 4, 1))
@@ -68,6 +169,50 @@ class TestCorr3DMM(unittest.TestCase):
                            filters_shape=(10, 6, 12, 4, 1),
                            subsample=(1, 2, 3))
+    def test_border_mode(self):
+        self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
+                            border_mode='valid')
+        self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
+                            border_mode='half')
+        self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
+                            border_mode='full')
+        self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
+                            border_mode=(0, 0, 0))
+        self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
+                            border_mode=(1, 2, 3))
+        self.run_conv_valid(inputs_shape=(16, 20, 12, 15, 1),
+                            filters_shape=(10, 6, 12, 4, 1),
+                            border_mode=(3, 2, 1))
+    def test_filter_dilation(self):
+        inputs_shape = [16, 20, 12, 15, 1]
+        filters_shape = [10, 6, 5, 4, 1]
+        for filter_dilation in [(2, 1, 1), (1, 2, 1), (1, 1, 2)]:
+            for border_mode in ['valid', 'half', 'full']:
+                self.run_conv_valid(inputs_shape=inputs_shape,
+                                    filters_shape=filters_shape,
+                                    filter_dilation=filter_dilation,
+                                    border_mode=border_mode)
+    def test_verify_gradients(self):
+        # use a small example to check the gradients
+        inputs_shape = [2, 7, 9, 6, 1]
+        filters_shape = [1, 3, 3, 2, 1]
+        for filter_dilation in [(2, 1, 1), (1, 2, 1), (1, 1, 2)]:
+            for border_mode in ['valid', 'half', 'full', (2, 1, 3)]:
+                self.run_conv_valid(inputs_shape=inputs_shape,
+                                    filters_shape=filters_shape,
+                                    filter_dilation=filter_dilation,
+                                    border_mode=border_mode,
+                                    verify_grad=True)
    def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape,
                       subsample=(1, 1, 1)):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')