Merge pull request #6092 from affanv14/separable

Separable convolutions

Merge pull request #6092 from affanv14/separable
e1911566 · abergeron · GitHub · 03cc25ea · c98bc838 · e1911566
--- a/theano/tensor/nnet/__init__.py
+++ b/theano/tensor/nnet/__init__.py
@@ -35,6 +35,7 @@ import warnings
 from .abstract_conv import conv2d as abstract_conv2d
 from .abstract_conv import conv2d_grad_wrt_inputs
 from .abstract_conv import conv3d
+from .abstract_conv import separable_conv2d
 def conv2d(input, filters, input_shape=None, filter_shape=None,

--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -544,6 +544,126 @@ def conv2d(input,
    return conv_op(input, filters)
+def separable_conv2d(input,
+                     depthwise_filters,
+                     pointwise_filters,
+                     num_channels,
+                     input_shape=None,
+                     depthwise_filter_shape=None,
+                     pointwise_filter_shape=None,
+                     border_mode='valid',
+                     subsample=(1, 1),
+                     filter_flip=True,
+                     filter_dilation=(1, 1)):
+    """
+    This function will build the symbolic graph for depthwise
+    convolutions which act separately on the input channels followed by
+    pointwise convolution which mixes channels.
+    Parameters
+    ----------
+    input: symbolic 4D tensor
+        Mini-batch of feature map stacks, of shape
+        (batch size, input channels, input rows, input columns).
+        See the optional parameter ``input_shape``.
+    depthwise_filters: symbolic 4D tensor
+        Set of filters used depthwise convolution layer of shape
+        (depthwise output channels, 1, filter rows, filter columns).
+    depthwise_filters: symbolic 4D tensor
+        Set of filters used pointwise convolution layer of shape
+        (output channels, depthwise output channels, 1, 1).
+    num_channels: int
+        The number of channels of the input. Required for depthwise
+        convolutions.
+    input_shape: None, tuple/list of len 4 of int or Constant variable
+        The shape of the input parameter.
+        Optional, possibly used to choose an optimal implementation.
+        You can give ``None`` for any element of the list to specify that this
+        element is not known at compile time.
+    depthwise_filter_shape: None, tuple/list of len 4 of int or Constant variable
+        The shape of the depthwise filters parameter.
+        Optional, possibly used to choose an optimal implementation.
+        You can give ``None`` for any element of the list to specify that this
+        element is not known at compile time.
+    pointwise_filter_shape: None, tuple/list of len 4 of int or Constant variable
+        The shape of the pointwise filters parameter.
+        Optional, possibly used to choose an optimal implementation.
+        You can give ``None`` for any element of the list to specify that this
+        element is not known at compile time.
+    border_mode: str, int or tuple of two int
+        This applies only to depthwise convolutions
+        Either of the following:
+        ``'valid'``: apply filter wherever it completely overlaps with the
+            input. Generates output of shape: input shape - filter shape + 1
+        ``'full'``: apply filter wherever it partly overlaps with the input.
+            Generates output of shape: input shape + filter shape - 1
+        ``'half'``: pad input with a symmetric border of ``filter rows // 2``
+            rows and ``filter columns // 2`` columns, then perform a valid
+            convolution. For filters with an odd number of rows and columns, this
+            leads to the output shape being equal to the input shape.
+        ``int``: pad input with a symmetric border of zeros of the given
+            width, then perform a valid convolution.
+        ``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
+            and ``int2`` columns, then perform a valid convolution.
+    subsample: tuple of len 2
+        Factor by which to subsample the output.
+        This applies only to depthwise convolutions
+    filter_flip: bool
+        If ``True``, will flip the filter rows and columns
+        before sliding them over the input. This operation is normally referred
+        to as a convolution, and this is the default. If ``False``, the filters
+        are not flipped and the operation is referred to as a cross-correlation.
+    filter_dilation: tuple of len 2
+        Factor by which to subsample (stride) the input.
+        This applies only to depthwise convolutions
+    Returns
+    -------
+    Symbolic 4D tensor
+        Set of feature maps generated by convolutional layer. Tensor is
+        of shape (batch size, output channels, output rows, output columns)
+"""
+    input = as_tensor_variable(input)
+    depthwise_filters = as_tensor_variable(depthwise_filters)
+    conv_op = AbstractConv2d(imshp=input_shape,
+                             kshp=depthwise_filter_shape,
+                             border_mode=border_mode,
+                             subsample=subsample,
+                             filter_flip=filter_flip,
+                             filter_dilation=filter_dilation,
+                             num_groups=num_channels)
+    if input_shape is None or depthwise_filter_shape is None:
+        depthwise_op_shape = None
+    else:
+        depthwise_op_shape = conv_op.infer_shape(None, [input_shape, depthwise_filter_shape])[0]
+    depthwise_op = conv_op(input, depthwise_filters)
+    pointwise_op = conv2d(input=depthwise_op,
+                          filters=pointwise_filters,
+                          input_shape=depthwise_op_shape,
+                          filter_shape=pointwise_filter_shape,
+                          border_mode='valid',
+                          subsample=(1, 1),
+                          filter_flip=filter_flip,
+                          filter_dilation=(1, 1),
+                          num_groups=1)
+    return pointwise_op
 def conv3d(input,
           filters,
           input_shape=None,

--- a/theano/tensor/nnet/tests/test_abstract_conv.py
+++ b/theano/tensor/nnet/tests/test_abstract_conv.py
@@ -23,6 +23,7 @@ from theano.tensor.nnet.abstract_conv import AbstractConv2d_gradWeights
 from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
 from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D
 from theano.tensor.nnet.abstract_conv import bilinear_upsampling
+from theano.tensor.nnet.abstract_conv import separable_conv2d
 from theano.tensor.nnet.conv import ConvOp
 from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
                                     CorrMM_gradInputs)
@@ -1854,3 +1855,72 @@ class Grouped_conv_noOptim(unittest.TestCase):
            utt.verify_grad(conv_gradinputs,
                            [kern, top],
                            mode=self.mode, eps=1)
+class Separable_conv(unittest.TestCase):
+    def test_interface(self):
+        x = np.array([[[[1, 2, 3, 4, 5], [3, 2, 1, 4, 5], [3, 3, 1, 3, 6], [5, 3, 2, 1, 1], [4, 7, 1, 2, 1]],
+                       [[3, 3, 1, 2, 6], [6, 5, 4, 3, 1], [3, 4, 5, 2, 3], [6, 4, 1, 3, 4], [2, 3, 4, 2, 5]]]]).astype(theano.config.floatX)
+        depthwise_filter = np.array([[[[3, 2, 1], [5, 3, 2], [6, 4, 2]]], [[[5, 5, 2], [3, 7, 4], [3, 5, 4]]],
+                                     [[[7, 4, 7], [5, 3, 3], [1, 3, 1]]], [[[4, 4, 4], [2, 4, 6], [0, 0, 7]]]]).astype(theano.config.floatX)
+        pointwise_filter = np.array([[[[4]], [[1]], [[3]], [[5]]], [[[2]], [[1]], [[2]], [[8]]]]).astype(theano.config.floatX)
+        precomp_output = np.array([[[[1385, 1333, 1339], [1382, 1243, 1291], [1303, 1120, 1228]],
+                                  [[1532, 1410, 1259], [1522, 1346, 1314], [1379, 1192, 1286]]]]).astype(theano.config.floatX)
+        x_sym = theano.tensor.tensor4('x')
+        dfilter_sym = theano.tensor.tensor4('d')
+        pfilter_sym = theano.tensor.tensor4('p')
+        sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, x.shape[1])
+        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
+        # test for square matrix
+        top = fun(x, depthwise_filter, pointwise_filter)
+        utt.assert_allclose(top, precomp_output)
+        # test for non-square matrix
+        top = fun(x[:, :, :3, :], depthwise_filter, pointwise_filter)
+        utt.assert_allclose(top, precomp_output[:, :, :1, :])
+        # test if it infers shape
+        sep_op = separable_conv2d(x_sym,
+                                  dfilter_sym,
+                                  pfilter_sym,
+                                  x.shape[1],
+                                  input_shape=x.shape,
+                                  depthwise_filter_shape=depthwise_filter.shape,
+                                  pointwise_filter_shape=pointwise_filter.shape)
+        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
+        top = fun(x, depthwise_filter, pointwise_filter)
+        utt.assert_allclose(top, precomp_output)
+        # test non-default subsample
+        sep_op = separable_conv2d(x_sym,
+                                  dfilter_sym,
+                                  pfilter_sym,
+                                  x.shape[1],
+                                  subsample=(2, 2))
+        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
+        top = fun(x, depthwise_filter, pointwise_filter)
+        utt.assert_allclose(top, np.delete(np.delete(precomp_output, 1, axis=3), 1, axis=2))
+        # test non-default border_mode
+        precomp_output = np.array([[[[140, 266, 343, 206, 59],
+                                     [395, 697, 979, 585, 245],
+                                     [429, 863, 1385, 919, 453],
+                                     [243, 499, 864, 627, 371],
+                                     [90, 183, 291, 254, 202]],
+                                    [[149, 289, 359, 213, 58],
+                                     [400, 750, 1076, 662, 266],
+                                     [387, 854, 1532, 1091, 540],
+                                     [174, 411, 971, 786, 518],
+                                     [51, 110, 286, 299, 298]]]]).astype(theano.config.floatX)
+        sep_op = separable_conv2d(x_sym, dfilter_sym, pfilter_sym, x.shape[1], border_mode='full')
+        fun = theano.function([x_sym, dfilter_sym, pfilter_sym], sep_op, mode='FAST_RUN')
+        top = fun(x[:, :, :3, :3], depthwise_filter, pointwise_filter)
+        utt.assert_allclose(top, precomp_output)