Merge pull request #6300 from affanv14/sep3d

3D separable convolutions

Merge pull request #6300 from affanv14/sep3d
c470bd38 · Frédéric Bastien · GitHub · 7befad61 · 89221d0d · c470bd38
--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -569,7 +569,7 @@ def separable_conv2d(input,
        Set of filters used depthwise convolution layer of shape
        (depthwise output channels, 1, filter rows, filter columns).

-    depthwise_filters: symbolic 4D tensor
+    pointwise_filters: symbolic 4D tensor
        Set of filters used pointwise convolution layer of shape
        (output channels, depthwise output channels, 1, 1).

@@ -662,6 +662,130 @@ def separable_conv2d(input,
    return pointwise_op


+def separable_conv3d(input,
+                     depthwise_filters,
+                     pointwise_filters,
+                     num_channels,
+                     input_shape=None,
+                     depthwise_filter_shape=None,
+                     pointwise_filter_shape=None,
+                     border_mode='valid',
+                     subsample=(1, 1, 1),
+                     filter_flip=True,
+                     filter_dilation=(1, 1, 1)):
+    """
+    This function will build the symbolic graph for depthwise
+    convolutions which act separately on the input channels followed by
+    pointwise convolution which mixes channels.
+
+    Parameters
+    ----------
+    input: symbolic 5D tensor
+        Mini-batch of feature map stacks, of shape
+        (batch size, input channels, input depth, input rows, input columns).
+        See the optional parameter ``input_shape``.
+
+    depthwise_filters: symbolic 5D tensor
+        Set of filters used depthwise convolution layer of shape
+        (depthwise output channels, 1, filter_depth, filter rows, filter columns).
+
+    pointwise_filters: symbolic 5D tensor
+        Set of filters used pointwise convolution layer of shape
+        (output channels, depthwise output channels, 1, 1, 1).
+
+    num_channels: int
+        The number of channels of the input. Required for depthwise
+        convolutions.
+
+    input_shape: None, tuple/list of len 5 of int or Constant variable
+        The shape of the input parameter.
+        Optional, possibly used to choose an optimal implementation.
+        You can give ``None`` for any element of the list to specify that this
+        element is not known at compile time.
+
+    depthwise_filter_shape: None, tuple/list of len 5 of int or Constant variable
+        The shape of the depthwise filters parameter.
+        Optional, possibly used to choose an optimal implementation.
+        You can give ``None`` for any element of the list to specify that this
+        element is not known at compile time.
+
+    pointwise_filter_shape: None, tuple/list of len 5 of int or Constant variable
+        The shape of the pointwise filters parameter.
+        Optional, possibly used to choose an optimal implementation.
+        You can give ``None`` for any element of the list to specify that this
+        element is not known at compile time.
+
+    border_mode: str, int or tuple of three int
+        This applies only to depthwise convolutions
+        Either of the following:
+
+        ``'valid'``: apply filter wherever it completely overlaps with the
+            input. Generates output of shape: input shape - filter shape + 1
+        ``'full'``: apply filter wherever it partly overlaps with the input.
+            Generates output of shape: input shape + filter shape - 1
+        ``'half'``: pad input with a symmetric border of ``filter // 2``,
+            then perform a valid convolution. For filters with an odd
+            number of slices, rows and columns, this leads to the output
+            shape being equal to the input shape.
+        ``int``: pad input with a symmetric border of zeros of the given
+            width, then perform a valid convolution.
+        ``(int1, int2, int3)``
+            pad input with a symmetric border of ``int1``, ``int2`` and
+            ``int3`` columns, then perform a valid convolution.
+
+    subsample: tuple of len 3
+        This applies only to depthwise convolutions
+        Factor by which to subsample the output.
+        Also called strides elsewhere.
+
+    filter_flip: bool
+        If ``True``, will flip the filter x, y and z dimensions before
+        sliding them over the input. This operation is normally
+        referred to as a convolution, and this is the default. If
+        ``False``, the filters are not flipped and the operation is
+        referred to as a cross-correlation.
+
+    filter_dilation: tuple of len 3
+        Factor by which to subsample (stride) the input.
+        Also called dilation elsewhere.
+
+    Returns
+    -------
+    Symbolic 5D tensor
+        Set of feature maps generated by convolutional layer. Tensor is
+        of shape (batch size, output channels, output_depth,
+        output rows, output columns)
+
+    """
+
+    input = as_tensor_variable(input)
+    depthwise_filters = as_tensor_variable(depthwise_filters)
+    conv_op = AbstractConv3d(imshp=input_shape,
+                             kshp=depthwise_filter_shape,
+                             border_mode=border_mode,
+                             subsample=subsample,
+                             filter_flip=filter_flip,
+                             filter_dilation=filter_dilation,
+                             num_groups=num_channels)
+
+    if input_shape is None or depthwise_filter_shape is None:
+        depthwise_op_shape = None
+    else:
+        depthwise_op_shape = conv_op.infer_shape(None, [input_shape, depthwise_filter_shape])[0]
+    depthwise_op = conv_op(input, depthwise_filters)
+
+    pointwise_op = conv3d(input=depthwise_op,
+                          filters=pointwise_filters,
+                          input_shape=depthwise_op_shape,
+                          filter_shape=pointwise_filter_shape,
+                          border_mode='valid',
+                          subsample=(1, 1, 1),
+                          filter_flip=filter_flip,
+                          filter_dilation=(1, 1, 1),
+                          num_groups=1)
+    return pointwise_op
+
+
 def conv3d(input,
           filters,
           input_shape=None,

--- a/theano/tensor/nnet/tests/test_abstract_conv.py
+++ b/theano/tensor/nnet/tests/test_abstract_conv.py
@@ -23,7 +23,7 @@ from theano.tensor.nnet.abstract_conv import AbstractConv2d_gradWeights
 from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
 from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D
 from theano.tensor.nnet.abstract_conv import bilinear_upsampling
-from theano.tensor.nnet.abstract_conv import separable_conv2d
+from theano.tensor.nnet.abstract_conv import separable_conv2d, separable_conv3d
 from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
                                     CorrMM_gradInputs)
 from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights,
@@ -1652,35 +1652,96 @@ class Grouped_conv3d_noOptim(Grouped_conv_noOptim):


 class Separable_conv(unittest.TestCase):
+    def setUp(self):
+        self.x = np.array([[[[1, 2, 3, 4, 5], [3, 2, 1, 4, 5], [3, 3, 1, 3, 6], [5, 3, 2, 1, 1], [4, 7, 1, 2, 1]],
+                            [[3, 3, 1, 2, 6], [6, 5, 4, 3, 1], [3, 4, 5, 2, 3], [6, 4, 1, 3, 4], [2, 3, 4, 2, 5]]]]).astype(theano.config.floatX)

-    def test_interface(self):
-        x = np.array([[[[1, 2, 3, 4, 5], [3, 2, 1, 4, 5], [3, 3, 1, 3, 6], [5, 3, 2, 1, 1], [4, 7, 1, 2, 1]],
-                       [[3, 3, 1, 2, 6], [6, 5, 4, 3, 1], [3, 4, 5, 2, 3], [6, 4, 1, 3, 4], [2, 3, 4, 2, 5]]]]).astype(theano.config.floatX)
+        self.depthwise_filter = np.array([[[[3, 2, 1], [5, 3, 2], [6, 4, 2]]], [[[5, 5, 2], [3, 7, 4], [3, 5, 4]]],
+                                          [[[7, 4, 7], [5, 3, 3], [1, 3, 1]]], [[[4, 4, 4], [2, 4, 6], [0, 0, 7]]]]).astype(theano.config.floatX)
+
+        self.pointwise_filter = np.array([[[[4]], [[1]], [[3]], [[5]]], [[[2]], [[1]], [[2]], [[8]]]]).astype(theano.config.floatX)

-        depthwise_filter = np.array([[[[3, 2, 1], [5, 3, 2], [6, 4, 2]]], [[[5, 5, 2], [3, 7, 4], [3, 5, 4]]],
-                                     [[[7, 4, 7], [5, 3, 3], [1, 3, 1]]], [[[4, 4, 4], [2, 4, 6], [0, 0, 7]]]]).astype(theano.config.floatX)
+        self.precomp_output_valid = np.array([[[[1385, 1333, 1339], [1382, 1243, 1291], [1303, 1120, 1228]],
+                                               [[1532, 1410, 1259], [1522, 1346, 1314], [1379, 1192, 1286]]]]).astype(theano.config.floatX)

-        pointwise_filter = np.array([[[[4]], [[1]], [[3]], [[5]]], [[[2]], [[1]], [[2]], [[8]]]]).astype(theano.config.floatX)
-        precomp_output = np.array([[[[1385, 1333, 1339], [1382, 1243, 1291], [1303, 1120, 1228]],
-                                  [[1532, 1410, 1259], [1522, 1346, 1314], [1379, 1192, 1286]]]]).astype(theano.config.floatX)
+        self.precomp_output_full = np.array([[[[140, 266, 343, 206, 59],
+                                              [395, 697, 979, 585, 245],
+                                              [429, 863, 1385, 919, 453],
+                                              [243, 499, 864, 627, 371],
+                                              [90, 183, 291, 254, 202]],

+                                             [[149, 289, 359, 213, 58],
+                                              [400, 750, 1076, 662, 266],
+                                              [387, 854, 1532, 1091, 540],
+                                              [174, 411, 971, 786, 518],
+                                              [51, 110, 286, 299, 298]]]]).astype(theano.config.floatX)
+
+    def test_interface2d(self):
        x_sym = theano.tensor.tensor4('x')
        dfilter_sym = theano.tensor.tensor4('d')
        pfilter_sym = theano.tensor.tensor4('p')

-        sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, x.shape[1])
+        sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, self.x.shape[1])
        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')

        # test for square matrix
-        top = fun(x, depthwise_filter, pointwise_filter)
-        utt.assert_allclose(top, precomp_output)
+        top = fun(self.x, self.depthwise_filter, self.pointwise_filter)
+        utt.assert_allclose(top, self.precomp_output_valid)

        # test for non-square matrix
-        top = fun(x[:, :, :3, :], depthwise_filter, pointwise_filter)
-        utt.assert_allclose(top, precomp_output[:, :, :1, :])
+        top = fun(self.x[:, :, :3, :], self.depthwise_filter, self.pointwise_filter)
+        utt.assert_allclose(top, self.precomp_output_valid[:, :, :1, :])

        # test if it infers shape
        sep_op = separable_conv2d(x_sym,
+                                  dfilter_sym,
+                                  pfilter_sym,
+                                  self.x.shape[1],
+                                  input_shape=self.x.shape,
+                                  depthwise_filter_shape=self.depthwise_filter.shape,
+                                  pointwise_filter_shape=self.pointwise_filter.shape)
+        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
+        top = fun(self.x, self.depthwise_filter, self.pointwise_filter)
+        utt.assert_allclose(top, self.precomp_output_valid)
+
+        # test non-default subsample
+        sep_op = separable_conv2d(x_sym,
+                                  dfilter_sym,
+                                  pfilter_sym,
+                                  self.x.shape[1],
+                                  subsample=(2, 2))
+        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
+        top = fun(self.x, self.depthwise_filter, self.pointwise_filter)
+        utt.assert_allclose(top, np.delete(np.delete(self.precomp_output_valid, 1, axis=3), 1, axis=2))
+
+        # test non-default border_mode
+        sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, self.x.shape[1], border_mode='full')
+        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
+        top = fun(self.x[:, :, :3, :3], self.depthwise_filter, self.pointwise_filter)
+        utt.assert_allclose(top, self.precomp_output_full)
+
+    def test_interface3d(self):
+        # Expand the filter along the depth
+        x = np.tile(np.expand_dims(self.x, axis=2), (1, 1, 5, 1, 1))
+        depthwise_filter = np.tile(np.expand_dims(self.depthwise_filter, axis=2), (1, 1, 3, 1, 1))
+        pointwise_filter = np.expand_dims(self.pointwise_filter, axis=2)
+        precomp_output = np.tile(np.expand_dims(self.precomp_output_valid, axis=2), (1, 1, 3, 1, 1)) * 3
+
+        x_sym = theano.tensor.tensor5('x')
+        dfilter_sym = theano.tensor.tensor5('d')
+        pfilter_sym = theano.tensor.tensor5('p')
+
+        sep_op = separable_conv3d(x_sym, dfilter_sym, pfilter_sym, x.shape[1])
+        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
+
+        # test for square matrix
+        top = fun(x, depthwise_filter, pointwise_filter)
+        utt.assert_allclose(top, precomp_output)
+        # test for non-square matrix
+        top = fun(x[:, :, :3, :, :3], depthwise_filter, pointwise_filter)
+        utt.assert_allclose(top, precomp_output[:, :, :1, :, :1])
+        # test if it infers shape
+        sep_op = separable_conv3d(x_sym,
                                  dfilter_sym,
                                  pfilter_sym,
                                  x.shape[1],
@@ -1692,29 +1753,20 @@ class Separable_conv(unittest.TestCase):
        utt.assert_allclose(top, precomp_output)

        # test non-default subsample
-        sep_op = separable_conv2d(x_sym,
+        sep_op = separable_conv3d(x_sym,
                                  dfilter_sym,
                                  pfilter_sym,
                                  x.shape[1],
-                                  subsample=(2, 2))
+                                  subsample=(2, 2, 2))
        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
        top = fun(x, depthwise_filter, pointwise_filter)
-        utt.assert_allclose(top, np.delete(np.delete(precomp_output, 1, axis=3), 1, axis=2))
-
+        utt.assert_allclose(top, np.delete(np.delete(
+            np.delete(precomp_output, 1, axis=4), 1, axis=3), 1, axis=2))
        # test non-default border_mode
-        precomp_output = np.array([[[[140, 266, 343, 206, 59],
-                                     [395, 697, 979, 585, 245],
-                                     [429, 863, 1385, 919, 453],
-                                     [243, 499, 864, 627, 371],
-                                     [90, 183, 291, 254, 202]],
-
-                                    [[149, 289, 359, 213, 58],
-                                     [400, 750, 1076, 662, 266],
-                                     [387, 854, 1532, 1091, 540],
-                                     [174, 411, 971, 786, 518],
-                                     [51, 110, 286, 299, 298]]]]).astype(theano.config.floatX)
-
-        sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, x.shape[1], border_mode='full')
+        precomp_output = np.tile(np.expand_dims(self.precomp_output_full, axis=2),
+                                 (1, 1, 5, 1, 1)) * np.array([[[[[1]], [[2]], [[3]], [[2]], [[1]]]]])
+
+        sep_op = separable_conv3d(x_sym, dfilter_sym, pfilter_sym, x.shape[1], border_mode='full')
        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
-        top = fun(x[:, :, :3, :3], depthwise_filter, pointwise_filter)
+        top = fun(x[:, :, :3, :3, :3], depthwise_filter, pointwise_filter)
        utt.assert_allclose(top, precomp_output)