Merge pull request #3845 from f0k/add-cudnn-half-padding

Add border_mode="half" to cuDNN convolutions

Merge pull request #3845 from f0k/add-cudnn-half-padding
45efd371 · Pascal Lamblin · 62d5cbeb · 13f8ea66 · 45efd371 · 45efd371
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -259,10 +259,10 @@ class GpuDnnConvDesc(GpuOp):
            assert len(border_mode) == len(subsample)
            border_mode = tuple(map(int, border_mode))
        if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
-                border_mode in ('valid', 'full')):
+                border_mode in ('valid', 'full', 'half')):
            raise ValueError(
                'invalid border_mode {}, which must be either '
-                '"valid", "full", an integer or a pair of'
+                '"valid", "full", "half", an integer or a pair of'
                ' integers'.format(border_mode))
        self.border_mode = border_mode
        assert len(subsample) in [2, 3]
@@ -292,12 +292,14 @@ class GpuDnnConvDesc(GpuOp):
        if isinstance(self.border_mode, tuple):
            pad_desc = tuple(map(int, self.border_mode))
            assert min(pad_desc) >= 0
-            bmode = 2
+            bmode = 1
        else:
            pad_desc = [0] * nb_dim
            if self.border_mode == "valid":
                bmode = 1
+            elif self.border_mode == "half":
+                bmode = 2
            else:
                assert self.border_mode == "full"
                bmode = 0
@@ -343,6 +345,14 @@ class GpuDnnConvDesc(GpuOp):
        pad[2] = *(npy_int64 *)PyArray_GETPTR1(%(kern_shape)s, 4) - 1;
    }
  }
+  // Adjust padding values if using half convolution
+  else if (%(bmode)d == 2) {
+    pad[0] = *(npy_int64 *)PyArray_GETPTR1(%(kern_shape)s, 2) / 2;
+    pad[1] = *(npy_int64 *)PyArray_GETPTR1(%(kern_shape)s, 3) / 2;
+    if (%(nb_dim)d >= 3) {
+        pad[2] = *(npy_int64 *)PyArray_GETPTR1(%(kern_shape)s, 4) / 2;
+    }
+  }
  err = cudnnSetConvolutionNdDescriptor_v3(
  %(desc)s,
@@ -365,7 +375,7 @@ class GpuDnnConvDesc(GpuOp):
           upscale_str=upscale_str, nb_dim=nb_dim, precision=precision)
    def c_code_cache_version(self):
-        return (3, version())
+        return (4, version())
 # scalar constants
 _zero = constant(numpy.asarray(0.0, dtype='float32'))
@@ -1097,7 +1107,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
    kerns
        Convolution filters.
    border_mode
-        One of 'valid', 'full'; additionally, the padding size can be
+        One of 'valid', 'full', 'half'; additionally, the padding size can be
        directly specified by an integer or a pair of integers (as a tuple),
        specifying the amount of zero padding added to _both_ the top and
        bottom (first entry) and left and right (second entry) sides of
@@ -1210,11 +1220,11 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
    :param img: images to do the convolution over
    :param kerns: convolution filters
-    :param border_mode: One of 'valid', 'full'; additionally, the padding
+    :param border_mode: One of 'valid', 'full', 'half'; additionally, the
-        size can be directly specified by an integer or a pair of integers
+        padding size can be directly specified by an integer or a triplet of
-        (as a tuple), specifying the amount of zero padding added to _both_
+        integers (as a tuple), specifying the amount of zero padding added to
-        the top and bottom (first entry) and left and right (second entry)
+        _both_ the top and bottom (first entry) and left and right (second
-        sides of the image.
+        entry) and front and back (third entry) sides of the volume.
    :param subsample: perform subsampling of the output (default: (1, 1, 1))
    :param conv_mode: perform convolution (kernels flipped) or
        cross-correlation. One of 'conv', 'cross'. (default: 'conv')

--- a/theano/sandbox/cuda/tests/test_abstractconv.py
+++ b/theano/sandbox/cuda/tests/test_abstractconv.py
@@ -37,15 +37,18 @@ class TestConv2d(unittest.TestCase):
        self.filters_shapes = [(5, 1, 2, 2), (4, 1, 3, 3), (2, 1, 3, 3),
                               (1, 1, 2, 5), (4, 1, 2, 2), (4, 5, 2, 2)]
        self.subsamples = [(1, 1), (2, 2), (2, 4)]
-        self.border_modes = ["valid", "full", (0, 0), (1, 1), (5, 5), (5, 2)]
+        self.border_modes = ["valid", "full", "half",
+                             (0, 0), (1, 1), (5, 5), (5, 2)]
        self.filter_flip = [True, False]
    def get_output_shape(self, inputs_shape, filters_shape,
                         subsample, border_mode):
        if border_mode == "valid":
            border_mode = (0, 0)
-        if border_mode == "full":
+        elif border_mode == "full":
            border_mode = (filters_shape[2] - 1, filters_shape[3] - 1)
+        elif border_mode == "half":
+            border_mode = (filters_shape[2] // 2, filters_shape[3] // 2)
        batch_size = inputs_shape[0]
        num_filters = filters_shape[0]
        return (batch_size, num_filters,) \

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -725,7 +725,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        kerns = T.ftensor4('kerns')
        out = T.ftensor4('out')
        img_val = numpy.asarray(
-            numpy.random.rand(7, 2, 6, 4),
+            numpy.random.rand(10, 2, 6, 4),
            dtype='float32'
        )
        kern_vals = numpy.asarray(
@@ -734,7 +734,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        )
        for params in product(
-            ['valid', 'full'],
+            ['valid', 'full', 'half'],
            [(1, 1), (2, 2)],
            ['conv', 'cross']
        ):
@@ -764,7 +764,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        kerns = ftensor5('kerns')
        out = ftensor5('out')
        img_val = numpy.asarray(
-            numpy.random.rand(7, 2, 6, 4, 11),
+            numpy.random.rand(10, 2, 6, 4, 11),
            dtype='float32'
        )
        kern_vals = numpy.asarray(
@@ -773,7 +773,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        )
        for params in product(
-            ['valid', 'full'],
+            ['valid', 'full', 'half'],
            [(1, 1, 1), (2, 2, 2)],
            ['conv', 'cross']
        ):
@@ -811,7 +811,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        )
        for params in product(
-            ['valid', 'full'],
+            ['valid', 'full', 'half'],
            [(1, 1)],  # strides besides (1, 1)
            ['conv', 'cross']
        ):
@@ -852,7 +852,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        kerns = ftensor5('kerns')
        out = ftensor5('out')
        img_val = numpy.asarray(
-            numpy.random.rand(9, 2, 4, 8, 7),
+            numpy.random.rand(9, 2, 4, 8, 13),
            dtype='float32'
        )
        kern_vals = numpy.asarray(
@@ -861,7 +861,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        )
        for params in product(
-            ['valid', 'full'],
+            ['valid', 'full', 'half'],
            [(1, 1, 1), (2, 2, 2)],
            ['conv', 'cross']
        ):
@@ -942,7 +942,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        kerns = ftensor5('kerns')
        out = ftensor5('out')
        img_val = numpy.asarray(
-            numpy.random.rand(8, 4, 6, 7, 5),
+            numpy.random.rand(8, 4, 6, 7, 11),
            dtype='float32'
        )
        kern_vals = numpy.asarray(
@@ -951,7 +951,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        )
        for params in product(
-            ['valid', 'full'],
+            ['valid', 'full', 'half'],
            [(1, 1, 1), (2, 2, 2)],
            ['conv', 'cross']
        ):
@@ -1065,6 +1065,7 @@ def test_dnn_conv_border_mode():
    dnn.dnn_conv(img, kern, border_mode=(2, 3))
    dnn.dnn_conv(img, kern, border_mode='full')
    dnn.dnn_conv(img, kern, border_mode='valid')
+    dnn.dnn_conv(img, kern, border_mode='half')
 def test_dnn_conv_alpha_output_merge():
@@ -1323,7 +1324,7 @@ def get_conv3d_test_cases():
                        [(6, 2, 2, 2, 2), (4, 2, 1, 3, 1), (1, 1, 1)],
                        [(6, 2, 2, 2, 2), (4, 2, 1, 1, 3), (1, 1, 1)],
                        [(6, 2, 2, 2, 2), (4, 2, 5, 5, 5), (1, 1, 1)]]
-    border_modes = ['valid', 'full', (1, 2, 3), (3, 2, 1), 1, 2]
+    border_modes = ['valid', 'full', 'half', (1, 2, 3), (3, 2, 1), 1, 2]
    conv_modes = ['conv', 'cross']
    if cuda.dnn.dnn_available() and dnn.version() >= (3000, 3000):
@@ -1379,6 +1380,8 @@ def test_conv3d_fwd():
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
+            elif border_mode == 'half':
+                pad_per_dim = [filters_shape[i] // 2 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3
@@ -1447,6 +1450,8 @@ def test_conv3d_bwd():
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
+            elif border_mode == 'half':
+                pad_per_dim = [filters_shape[i] // 2 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3

--- a/theano/tests/unittest_tools.py
+++ b/theano/tests/unittest_tools.py
@@ -219,12 +219,12 @@ class InferShapeTester(unittest.TestCase):
                    shp = inp.shape
                if len(set(shp)) != len(shp):
                    _logger.warn(
-                        "While testing the shape inference, we received an"
+                        "While testing shape inference for %r, we received an"
-                        " input with a shape that has some repeated values: %s"
+                        " input with a shape that has some repeated values: %r"
                        ", like a square matrix. This makes it impossible to"
                        " check if the values for these dimensions have been"
                        " correctly used, or if they have been mixed up.",
-                        str(inp.shape))
+                        cls, inp.shape)
                    break
        outputs_function = theano.function(inputs, outputs, mode=mode)