Merge pull request #6331 from vikramnitin9/dilated_causal

Dilated causal convolution

Merge pull request #6331 from vikramnitin9/dilated_causal
ea62004b · Frédéric Bastien · GitHub · c3c477df · 321e2fc6 · ea62004b
--- a/theano/gpuarray/blas.py
+++ b/theano/gpuarray/blas.py
--- a/theano/gpuarray/c_code/corr_gemm.c
+++ b/theano/gpuarray/c_code/corr_gemm.c
--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -3084,6 +3084,10 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
    if op.unshared:
        return None

+    if isinstance(op.border_mode, tuple) and any(isinstance(p, tuple) for p in op.border_mode):
+        # Asymmetric padding not yet supported
+        return None
+
    inp1 = inputs[0]
    inp2 = inputs[1]

@@ -3180,6 +3184,9 @@ def local_abstractconv_cudnn(node):
        return
    if node.op.unshared:
        return None
+    if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
+        # Asymmetric padding not yet supported
+        return None
    if isinstance(node.op, AbstractConv2d):
        with inherit_stack_trace(node.outputs):
            return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@@ -3198,6 +3205,9 @@ def local_abstractconv_cudnn_alt(node):
        return None
    if node.op.unshared:
        return None
+    if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
+        # Asymmetric padding not yet supported
+        return None
    inp1 = node.inputs[0]
    inp2 = node.inputs[1]

@@ -3407,6 +3417,9 @@ def local_abstractconv_gw_cudnn(node):
        return
    if node.op.unshared:
        return None
+    if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
+        # Asymmetric padding not yet supported
+        return None
    if isinstance(node.op, AbstractConv2d_gradWeights):
        with inherit_stack_trace(node.outputs):
            return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@@ -3422,6 +3435,9 @@ def local_abstractconv_gi_cudnn(node):
        return
    if node.op.unshared:
        return None
+    if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
+        # Asymmetric padding not yet supported
+        return None
    if isinstance(node.op, AbstractConv2d_gradInputs):
        with inherit_stack_trace(node.outputs):
            return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)

--- a/theano/gpuarray/tests/test_gemmcorr.py
+++ b/theano/gpuarray/tests/test_gemmcorr.py
@@ -12,6 +12,7 @@ from ..type import gpuarray_shared_constructor
 from ..blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs
 from .config import mode_with_gpu, mode_without_gpu, ref_cast
 from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv
+from theano.tensor.nnet.tests.test_abstract_conv import TestAsymmetricPadding, TestCausalConv


 class TestCorrMM(unittest.TestCase):
@@ -272,3 +273,14 @@ class TestUnsharedGpuCorr2d(TestUnsharedConv):
    conv2d_op = GpuCorrMM
    conv2d_gradw_op = GpuCorrMM_gradWeights
    conv2d_gradi_op = GpuCorrMM_gradInputs
+
+
+class TestAsymmetricGpu(TestAsymmetricPadding):
+    mode = mode_with_gpu
+    conv2d_op = GpuCorrMM
+    conv2d_gradw_op = GpuCorrMM_gradWeights
+    conv2d_gradi_op = GpuCorrMM_gradInputs
+
+
+class TestCausalGpuCorr(TestCausalConv):
+    mode = mode_with_gpu
--- a/theano/tensor/nnet/__init__.py
+++ b/theano/tensor/nnet/__init__.py
@@ -72,7 +72,7 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
        You can give ``None`` for any element of the list to specify that this
        element is not known at compile time.

-    border_mode: str, int or tuple of two int
+    border_mode: str, int or a tuple of two ints or pairs of ints
        Either of the following:

        ``'valid'``: apply filter wherever it completely overlaps with the
@@ -85,8 +85,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
            leads to the output shape being equal to the input shape.
        ``int``: pad input with a symmetric border of zeros of the given
            width, then perform a valid convolution.
-        ``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
-            and ``int2`` columns, then perform a valid convolution.
+        ``(int1, int2)``: (for 2D) pad input with a symmetric border of ``int1``,
+            ``int2``, then perform a valid convolution.
+        ``(int1, (int2, int3))`` or ``((int1, int2), int3)``: (for 2D)
+            pad input with one symmetric border of `int1`` or ``int3``, and
+            one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.

    subsample: tuple of len 2
        Factor by which to subsample the output.

--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
--- a/theano/tensor/nnet/c_code/corr_gemm.c
+++ b/theano/tensor/nnet/c_code/corr_gemm.c
@@ -31,23 +31,23 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 void im2col(const %(float_type)s* data_im, const int channels,
    const int height, const int width, const int kernel_h, const int kernel_w,
    const int dilation_h, const int dilation_w,
-    const int pad_h, const int pad_w,
+    const int pad_hl, const int pad_hr, const int pad_wl, const int pad_wr,
    const int stride_h, const int stride_w,
    %(float_type)s* data_col) {
  // Implicit dilated kernel size
  int dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
  int dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
-  int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1;
-  int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1;
+  int height_col = (height + pad_hl + pad_hr - dil_kernel_h) / stride_h + 1;
+  int width_col = (width + pad_wl + pad_wr - dil_kernel_w) / stride_w + 1;
  int channels_col = channels * kernel_h * kernel_w;
  for (int c = 0; c < channels_col; ++c) {
    int w_offset = c %% kernel_w;
    int h_offset = (c / kernel_w) %% kernel_h;
    int c_im = c / kernel_h / kernel_w;
    for (int h = 0; h < height_col; ++h) {
-      int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
+      int h_pad = h * stride_h - pad_hl + h_offset * dilation_h;
      for (int w = 0; w < width_col; ++w) {
-        int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
+        int w_pad = w * stride_w - pad_wl + w_offset * dilation_w;
        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
          data_col[(npy_intp)(c * height_col + h) * width_col + w] =
            data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad];
@@ -64,13 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels,
 void col2im(const %(float_type)s* data_col, const int channels,
    const int height, const int width, const int patch_h, const int patch_w,
    const int dilation_h, const int dilation_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, %(float_type)s* data_im) {
+    const int pad_hl, const int pad_hr, const int pad_wl, const int pad_wr,
+    const int stride_h, const int stride_w,
+    %(float_type)s* data_im) {
  // Implicit dilated patch
  int dil_patch_h = (patch_h - 1) * dilation_h + 1;
  int dil_patch_w = (patch_w - 1) * dilation_w + 1;
-  int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1;
-  int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1;
+  int height_col = (height + pad_hl + pad_hr - dil_patch_h) / stride_h + 1;
+  int width_col = (width + pad_wl + pad_wr - dil_patch_w) / stride_w + 1;
  int num_kernels = channels * height * width;
  int channels_col = channels * patch_h * patch_w;
  for (int c = 0; c < channels_col; ++c) {
@@ -78,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels,
    int h_offset = (c / patch_w) %% patch_h;
    int c_im = c / patch_h / patch_w;
    for (int h = 0; h < height_col; ++h) {
-      int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
+      int h_pad = h * stride_h - pad_hl + h_offset * dilation_h;
      for (int w = 0; w < width_col; ++w) {
-        int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
+        int w_pad = w * stride_w - pad_wl + w_offset * dilation_w;
        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
          data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad] +=
            data_col[(npy_intp)(c * height_col + h) * width_col + w];
@@ -105,8 +106,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
                      const int dW = 1,
                      const int dilH = 1,
                      const int dilW = 1,
-                      const int padH = 0,
-                      const int padW = 0,
+                      const int padH_l = 0,
+                      const int padH_r = 0,
+                      const int padW_l = 0,
+                      const int padW_r = 0,
                      const int numgroups = 1,
                      const int unshared = 0)
 {
@@ -172,8 +175,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
    const int dil_kH = (kH - 1) * dilH + 1;
    const int dil_kW = (kW - 1) * dilW + 1;
    // top: (batchSize, nFilters, topHeight, topWidth)
-    const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
-    const int topWidthNoDW  = (bottomWidth + 2*padW - dil_kW);
+    const int topHeightNoDH = (bottomHeight + padH_l + padH_r - dil_kH);
+    const int topWidthNoDW  = (bottomWidth + padW_l + padW_r - dil_kW);
    // the above values might be negative so we need to use Python-like
    // flooring integer division to be compatible with get_conv_output.
    // note: this macro implements Python's // for negative x only
@@ -303,7 +306,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
            int tid = %(omp_get_thread_num)s;
            // First, im2col
            im2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride, nChannels,
-                   bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH, padW, dH, dW,
+                   bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r, dH, dW,
                   (%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
            // Second, gemm
            if (unshared) {
@@ -396,7 +399,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
            int tid = %(omp_get_thread_num)s;
            // First, im2col
            im2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride,
-                   nChannels, bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH, padW, dH, dW,
+                   nChannels, bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r, dH, dW,
                   (%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
            // Second, gemm
            // Note that we accumulate into weight. We do so by setting beta = 0
@@ -519,7 +522,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
            }
            // col2im back to the data
            col2im((%(float_type)s*)PyArray_DATA(col) + tid * col_stride, nChannels, bottomHeight, bottomWidth,
-                   kH, kW, dilH, dilW, padH, padW,
+                   kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r,
                   dH, dW, (%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride);
        }
        // Restore to previous blas threads

--- a/theano/tensor/nnet/corr.py
+++ b/theano/tensor/nnet/corr.py
--- a/theano/tensor/nnet/tests/test_abstract_conv.py
+++ b/theano/tensor/nnet/tests/test_abstract_conv.py
@@ -24,6 +24,7 @@ from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
 from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D
 from theano.tensor.nnet.abstract_conv import bilinear_upsampling
 from theano.tensor.nnet.abstract_conv import separable_conv2d, separable_conv3d
+from theano.tensor.nnet.abstract_conv import causal_conv1d
 from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
                                     CorrMM_gradInputs)
 from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights,
@@ -1894,3 +1895,156 @@ class TestUnsharedConv(unittest.TestCase):

            if verify:
                utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
+
+
+class TestAsymmetricPadding(unittest.TestCase):
+    conv2d = theano.tensor.nnet.abstract_conv.AbstractConv2d
+    conv2d_gradw = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+    conv2d_op = theano.tensor.nnet.abstract_conv.AbstractConv2d
+    conv2d_gradw_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
+    conv2d_gradi_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
+
+    mode = theano.compile.mode.Mode(optimizer='None')
+
+    img_shape = [(2, 2, 4, 4), (3, 2, 4, 2), (3, 3, 5, 3)]
+    kern_shape = [(4, 2, 2, 2), (2, 2, 4, 2), (2, 3, 3, 3)]
+    topgrad_shape = [(2, 4, 6, 6), (3, 2, 3, 4), (3, 2, 6, 1)]
+    border_mode = [((1, 2), (2, 1)), ((1, 1), (0, 3)), ((2, 1), (0, 0))]
+
+    def test_fwd(self):
+        img_sym = theano.tensor.tensor4('img')
+        kern_sym = theano.tensor.tensor4('kern')
+
+        for imshp, kshp, pad in zip(self.img_shape, self.kern_shape, self.border_mode):
+            img = np.random.random(imshp).astype(theano.config.floatX)
+            kern = np.random.random(kshp).astype(theano.config.floatX)
+
+            asymmetric_conv_op = self.conv2d(border_mode=pad, subsample=(1, 1),
+                                             filter_dilation=(1, 1))
+            asymmetric_out_sym = asymmetric_conv_op(img_sym, kern_sym)
+            asymmetric_func = theano.function([img_sym, kern_sym], asymmetric_out_sym, mode=self.mode)
+            assert any([isinstance(node.op, self.conv2d_op)
+                        for node in asymmetric_func.maker.fgraph.toposort()])
+            asymmetric_output = asymmetric_func(img, kern)
+
+            ref_conv_op = self.conv2d(border_mode="valid", subsample=(1, 1),
+                                      filter_dilation=(1, 1))
+            ref_out_sym = ref_conv_op(img_sym, kern_sym)
+            ref_func = theano.function([img_sym, kern_sym], ref_out_sym, mode=self.mode)
+
+            exp_imshp = (imshp[0], imshp[1],
+                         imshp[2] + pad[0][0] + pad[0][1],
+                         imshp[3] + pad[1][0] + pad[1][1])
+
+            exp_img = np.zeros(exp_imshp, dtype=theano.config.floatX)
+            exp_img[:, :, pad[0][0]:imshp[2] + pad[0][0],
+                    pad[1][0]:imshp[3] + pad[1][0]] = img
+            ref_output = ref_func(exp_img, kern)
+
+            utt.assert_allclose(asymmetric_output, ref_output)
+
+            utt.verify_grad(asymmetric_conv_op, [img, kern], mode=self.mode, eps=1)
+
+    def test_gradweight(self):
+        img_sym = theano.tensor.tensor4('img')
+        top_sym = theano.tensor.tensor4('top')
+
+        for imshp, kshp, topshp, pad in zip(self.img_shape, self.kern_shape, self.topgrad_shape, self.border_mode):
+            img = np.random.random(imshp).astype(theano.config.floatX)
+            top = np.random.random(topshp).astype(theano.config.floatX)
+
+            asymmetric_conv_op = self.conv2d_gradw(border_mode=pad, subsample=(1, 1),
+                                                   filter_dilation=(1, 1))
+            asymmetric_out_sym = asymmetric_conv_op(img_sym, top_sym, kshp[-2:])
+            asymmetric_func = theano.function([img_sym, top_sym], asymmetric_out_sym, mode=self.mode)
+            assert any([isinstance(node.op, self.conv2d_gradw_op)
+                        for node in asymmetric_func.maker.fgraph.toposort()])
+            asymmetric_output = asymmetric_func(img, top)
+
+            ref_conv_op = self.conv2d_gradw(border_mode="valid", subsample=(1, 1),
+                                            filter_dilation=(1, 1))
+            ref_out_sym = ref_conv_op(img_sym, top_sym, kshp[-2:])
+            ref_func = theano.function([img_sym, top_sym], ref_out_sym, mode=self.mode)
+
+            exp_imshp = (imshp[0], imshp[1],
+                         imshp[2] + pad[0][0] + pad[0][1],
+                         imshp[3] + pad[1][0] + pad[1][1])
+
+            exp_img = np.zeros(exp_imshp, dtype=theano.config.floatX)
+            exp_img[:, :, pad[0][0]:imshp[2] + pad[0][0],
+                    pad[1][0]:imshp[3] + pad[1][0]] = img
+            ref_output = ref_func(exp_img, top)
+
+            utt.assert_allclose(asymmetric_output, ref_output)
+
+            def conv_gradweight(inputs_val, output_val):
+                return asymmetric_conv_op(inputs_val, output_val, tensor.as_tensor_variable(kshp[-2:]))
+
+            utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1)
+
+    def test_gradinput(self):
+        kern_sym = theano.tensor.tensor4('kern')
+        top_sym = theano.tensor.tensor4('top')
+
+        for imshp, kshp, topshp, pad in zip(self.img_shape, self.kern_shape, self.topgrad_shape, self.border_mode):
+            kern = np.random.random(kshp).astype(theano.config.floatX)
+            top = np.random.random(topshp).astype(theano.config.floatX)
+
+            asymmetric_conv_op = self.conv2d_gradi(border_mode=pad, subsample=(1, 1),
+                                                   filter_dilation=(1, 1))
+            asymmetric_out_sym = asymmetric_conv_op(kern_sym, top_sym, imshp[-2:])
+            asymmetric_func = theano.function([kern_sym, top_sym], asymmetric_out_sym, mode=self.mode)
+            assert any([isinstance(node.op, self.conv2d_gradi_op)
+                        for node in asymmetric_func.maker.fgraph.toposort()])
+            asymmetric_output = asymmetric_func(kern, top)
+
+            ref_conv_op = self.conv2d_gradi(border_mode="valid", subsample=(1, 1),
+                                            filter_dilation=(1, 1))
+            exp_imshp = [imshp[2] + pad[0][0] + pad[0][1],
+                         imshp[3] + pad[1][0] + pad[1][1]]
+            ref_out_sym = ref_conv_op(kern_sym, top_sym, exp_imshp)
+            ref_func = theano.function([kern_sym, top_sym], ref_out_sym, mode=self.mode)
+
+            ref_output = ref_func(kern, top)
+
+            ref_output = ref_output[:, :, pad[0][0]:imshp[2] + pad[0][0],
+                                    pad[1][0]:imshp[3] + pad[1][0]]
+
+            utt.assert_allclose(asymmetric_output, ref_output)
+
+            def conv_gradinputs(filters_val, output_val):
+                return asymmetric_conv_op(filters_val, output_val, tensor.as_tensor_variable(imshp[-2:]))
+
+            utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
+
+
+class TestCausalConv(unittest.TestCase):
+    mode = theano.compile.mode.Mode(optimizer='None')
+
+    img = np.array([[[2, 4, 9, 5, 8], [0, 0, 4, 0, 5]],
+                    [[2, 5, 8, 5, 5], [1, 3, 0, 7, 9]],
+                    [[7, 0, 7, 1, 0], [0, 1, 4, 7, 2]]]).astype(theano.config.floatX)
+    kern = np.array([[[5, 3, 1], [3, 1, 0]],
+                     [[6, 4, 9], [2, 2, 7]]]).astype(theano.config.floatX)
+    dilation = 2
+    precomp_top = np.array([[[10, 20, 63, 37, 88], [12, 24, 70, 46, 120]],
+                            [[13, 34, 47, 64, 78], [14, 36, 58, 70, 105]],
+                            [[35, 3, 68, 27, 38], [42, 2, 78, 22, 103]]]).astype(theano.config.floatX)
+
+    def test_interface(self):
+        img_sym = theano.tensor.tensor3('img')
+        kern_sym = theano.tensor.tensor3('kern')
+
+        sym_out = causal_conv1d(img_sym, kern_sym, self.kern.shape, filter_dilation=self.dilation)
+
+        causal_func = theano.function([img_sym, kern_sym], sym_out, mode=self.mode)
+
+        output = causal_func(self.img, self.kern)
+
+        utt.assert_allclose(output, self.precomp_top)
+
+        def causal_conv_fn(inputs_val, filters_val):
+            return causal_conv1d(inputs_val, filters_val, self.kern.shape, filter_dilation=1)
+
+        utt.verify_grad(causal_conv_fn, [self.img, self.kern], mode=self.mode, eps=1)
--- a/theano/tensor/nnet/tests/test_corr.py
+++ b/theano/tensor/nnet/tests/test_corr.py
@@ -11,6 +11,7 @@ import theano.tensor as T
 from theano.tests import unittest_tools as utt
 from theano.tensor.nnet import corr, conv
 from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv
+from theano.tensor.nnet.tests.test_abstract_conv import TestAsymmetricPadding, TestCausalConv


 class TestCorr2D(utt.InferShapeTester):
@@ -454,6 +455,23 @@ class TestUnsharedCorr2d(TestUnsharedConv):
    conv2d_gradi_op = corr.CorrMM_gradInputs


+class TestAsymmetricCorr(TestAsymmetricPadding):
+    if theano.config.mode == "FAST_COMPILE":
+        mode = theano.compile.get_mode("FAST_RUN").excluding('gpuarray')
+    else:
+        mode = None
+    conv2d_op = corr.CorrMM
+    conv2d_gradw_op = corr.CorrMM_gradWeights
+    conv2d_gradi_op = corr.CorrMM_gradInputs
+
+
+class TestCausalCorr(TestCausalConv):
+    if theano.config.mode == "FAST_COMPILE":
+        mode = theano.compile.get_mode("FAST_RUN").excluding('gpuarray')
+    else:
+        mode = None
+
+
 if __name__ == '__main__':

    t = TestCorr2D('setUp')