Merge pull request #4850 from gvtulder/f-conv3d2d-half

Half 3D convolution for conv3d2d

Merge pull request #4850 from gvtulder/f-conv3d2d-half
853cf7e6 · Frédéric Bastien · GitHub · aecbbb99 · a87d8540 · 853cf7e6
--- a/theano/tensor/nnet/conv3d2d.py
+++ b/theano/tensor/nnet/conv3d2d.py
@@ -190,7 +190,7 @@ def conv3d(signals, filters,
    filters_shape
        None or a tuple/list with the shape of filters.
    border_mode
-        The only one tested is 'valid'.
+        One of 'valid', 'full' or 'half'.

    Notes
    -----
@@ -221,18 +221,11 @@ def conv3d(signals, filters,
    else:
        _filters_shape_5d = filters_shape

-    _signals_shape_4d = (
-        _signals_shape_5d[0] * _signals_shape_5d[1],
-        _signals_shape_5d[2],
-        _signals_shape_5d[3],
-        _signals_shape_5d[4],
-    )
-    _filters_shape_4d = (
-        _filters_shape_5d[0] * _filters_shape_5d[1],
-        _filters_shape_5d[2],
-        _filters_shape_5d[3],
-        _filters_shape_5d[4],
-    )
+    Ns, Ts, C, Hs, Ws = _signals_shape_5d
+    Nf, Tf, C, Hf, Wf = _filters_shape_5d
+
+    _signals_shape_4d = (Ns * Ts, C, Hs, Ws)
+    _filters_shape_4d = (Nf * Tf, C, Hf, Wf)

    if border_mode[1] != border_mode[2]:
        raise NotImplementedError('height and width bordermodes must match')
@@ -250,73 +243,54 @@ def conv3d(signals, filters,
        filter_shape=conv2d_filter_shape,
        border_mode=border_mode[1])  # ignoring border_mode[2]

-    # reshape the output to restore its original size
-    # shape = Ns, Ts, Nf, Tf, W-Wf+1, H-Hf+1
+    # compute the intended output size
    if border_mode[1] == 'valid':
-        out_tmp = out_4d.reshape((
-            _signals_shape_5d[0],  # Ns
-            _signals_shape_5d[1],  # Ts
-            _filters_shape_5d[0],  # Nf
-            _filters_shape_5d[1],  # Tf
-            _signals_shape_5d[3] - _filters_shape_5d[3] + 1,
-            _signals_shape_5d[4] - _filters_shape_5d[4] + 1,
-        ))
+        Hout = Hs - Hf + 1
+        Wout = Ws - Wf + 1
    elif border_mode[1] == 'full':
-        out_tmp = out_4d.reshape((
-            _signals_shape_5d[0],  # Ns
-            _signals_shape_5d[1],  # Ts
-            _filters_shape_5d[0],  # Nf
-            _filters_shape_5d[1],  # Tf
-            _signals_shape_5d[3] + _filters_shape_5d[3] - 1,
-            _signals_shape_5d[4] + _filters_shape_5d[4] - 1,
-        ))
+        Hout = Hs + Hf - 1
+        Wout = Ws + Wf - 1
+    elif border_mode[1] == 'half':
+        Hout = Hs - (Hf % 2) + 1
+        Wout = Ws - (Wf % 2) + 1
    elif border_mode[1] == 'same':
        raise NotImplementedError()
    else:
        raise ValueError('invalid border mode', border_mode[1])

+    # reshape the temporary output to restore its original size
+    out_tmp = out_4d.reshape((Ns, Ts, Nf, Tf, Hout, Wout))
+
    # now sum out along the Tf to get the output
    # but we have to sum on a diagonal through the Tf and Ts submatrix.
-    if border_mode[0] == 'valid':
-        if _filters_shape_5d[1] != 1:
+    if Tf == 1:
+        # for Tf==1, no sum along Tf, the Ts-axis of the output is unchanged!
+        out_5d = out_tmp.reshape((Ns, Ts, Nf, Hout, Wout))
+    else:
+        # for some types of convolution, pad out_tmp with zeros
+        if border_mode[0] == 'valid':
+            Tpad = 0
+        elif border_mode[0] == 'full':
+            Tpad = Tf - 1
+        elif border_mode[0] == 'half':
+            Tpad = Tf // 2
+        elif border_mode[0] == 'same':
+            raise NotImplementedError()
+        else:
+            raise ValueError('invalid border mode', border_mode[0])
+
+        if Tpad == 0:
            out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3)
-        else:  # for Tf==1, no sum along Tf, the Ts-axis of the output is unchanged!
-            out_5d = out_tmp.reshape((
-                _signals_shape_5d[0],
-                _signals_shape_5d[1],
-                _filters_shape_5d[0],
-                _signals_shape_5d[3] - _filters_shape_5d[3] + 1,
-                _signals_shape_5d[4] - _filters_shape_5d[4] + 1,
-            ))
-    elif border_mode[0] == 'full':
-        if _filters_shape_5d[1] != 1:
-            # pad out_tmp with zeros to have full convolution
+        else:
+            # pad out_tmp with zeros before summing over the diagonal
            out_tmp_padded = tensor.zeros(dtype=out_tmp.dtype, shape=(
-                _signals_shape_5d[0],  # Ns
-                _signals_shape_5d[1] + 2 * (_filters_shape_5d[1] - 1),  # Ts
-                _filters_shape_5d[0],  # Nf
-                _filters_shape_5d[1],  # Tf
-                _signals_shape_5d[3] + _filters_shape_5d[3] - 1,
-                _signals_shape_5d[4] + _filters_shape_5d[4] - 1,
+                Ns, Ts + 2 * Tpad, Nf, Tf, Hout, Wout
            ))
            out_tmp_padded = tensor.set_subtensor(
-                out_tmp_padded[:,
-                               (_filters_shape_5d[1] - 1):(_signals_shape_5d[1] + _filters_shape_5d[1] - 1),
-                               :, :, :, :],
+                out_tmp_padded[:, Tpad:(Ts + Tpad), :, :, :, :],
                out_tmp)
            out_5d = diagonal_subtensor(out_tmp_padded, 1, 3).sum(axis=3)
-        else:  # for tf==1, no sum along tf, the ts-axis of the output is unchanged!
-            out_5d = out_tmp.reshape((
-                _signals_shape_5d[0],
-                _signals_shape_5d[1],
-                _filters_shape_5d[0],
-                _signals_shape_5d[3] + _filters_shape_5d[3] - 1,
-                _signals_shape_5d[4] + _filters_shape_5d[4] - 1,
-            ))
-    elif border_mode[0] == 'same':
-        raise NotImplementedError('sequence border mode', border_mode[0])
-    else:
-        raise ValueError('invalid border mode', border_mode[1])
+
    return out_5d



--- a/theano/tensor/nnet/tests/test_conv3d2d.py
+++ b/theano/tensor/nnet/tests/test_conv3d2d.py
@@ -55,19 +55,32 @@ def test_get_diagonal_subtensor_view(wrap=lambda a: a):


 def pyconv3d(signals, filters, border_mode='valid'):
-    if border_mode == 'full':
-        # zero-pad signals for full convolution
-        Ns, Ts, C, Hs, Ws = signals.shape
-        Nf, Tf, C, Hf, Wf = filters.shape
-        signals_padded = numpy.zeros((Ns, Ts + 2 * (Tf - 1), C,
-                                      Hs + 2 * (Hf - 1), Ws + 2 * (Wf - 1)), 'float32')
-        signals_padded[:, (Tf - 1):(Ts + Tf - 1), :, (Hf - 1):(Hs + Hf - 1),
-                       (Wf - 1):(Ws + Wf - 1)] = signals
-        signals = signals_padded
-
    Ns, Ts, C, Hs, Ws = signals.shape
    Nf, Tf, C, Hf, Wf = filters.shape

+    # if border_mode is not 'valid', the signals need zero-padding
+    if border_mode == 'full':
+        Tpad = Tf - 1
+        Hpad = Hf - 1
+        Wpad = Wf - 1
+    elif border_mode == 'half':
+        Tpad = Tf // 2
+        Hpad = Hf // 2
+        Wpad = Wf // 2
+    else:
+        Tpad = 0
+        Hpad = 0
+        Wpad = 0
+
+    if Tpad > 0 or Hpad > 0 or Wpad > 0:
+        # zero-pad signals
+        signals_padded = numpy.zeros((Ns, Ts + 2 * Tpad, C,
+                                      Hs + 2 * Hpad, Ws + 2 * Wpad), 'float32')
+        signals_padded[:, Tpad:(Ts + Tpad), :, Hpad:(Hs + Hpad),
+                       Wpad:(Ws + Wpad)] = signals
+        Ns, Ts, C, Hs, Ws = signals_padded.shape
+        signals = signals_padded
+
    Tf2 = Tf // 2
    Hf2 = Hf // 2
    Wf2 = Wf // 2
@@ -91,7 +104,7 @@ def check_diagonal_subtensor_view_traces(fn):
        fn, ops_to_check=(DiagonalSubtensor, IncDiagonalSubtensor))


-@parameterized.expand(('valid', 'full'), utt.custom_name_func)
+@parameterized.expand(('valid', 'full', 'half'), utt.custom_name_func)
 def test_conv3d(border_mode):
    check_conv3d(border_mode=border_mode,
                 mode=mode_without_gpu,