Documentation suggestions implemented

5ae986b1 · Vikram · 444f7d56 · 5ae986b1 · 5ae986b1 · 5ae986b1
--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -3039,6 +3039,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
        return None
    if isinstance(op.border_mode, tuple) and any(isinstance(p, tuple) for p in op.border_mode):
+        # Asymmetric padding not yet supported
        return None
    inp1 = inputs[0]
@@ -3138,6 +3139,7 @@ def local_abstractconv_cudnn(node):
    if node.op.unshared:
        return None
    if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
+        # Asymmetric padding not yet supported
        return None
    if isinstance(node.op, AbstractConv2d):
        return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@@ -3156,6 +3158,7 @@ def local_abstractconv_cudnn_alt(node):
    if node.op.unshared:
        return None
    if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
+        # Asymmetric padding not yet supported
        return None
    inp1 = node.inputs[0]
    inp2 = node.inputs[1]
@@ -3366,6 +3369,7 @@ def local_abstractconv_gw_cudnn(node):
    if node.op.unshared:
        return None
    if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
+        # Asymmetric padding not yet supported
        return None
    if isinstance(node.op, AbstractConv2d_gradWeights):
        return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
@@ -3381,6 +3385,7 @@ def local_abstractconv_gi_cudnn(node):
    if node.op.unshared:
        return None
    if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
+        # Asymmetric padding not yet supported
        return None
    if isinstance(node.op, AbstractConv2d_gradInputs):
        return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)

--- a/theano/tensor/nnet/__init__.py
+++ b/theano/tensor/nnet/__init__.py
@@ -72,18 +72,17 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
        You can give ``None`` for any element of the list to specify that this
        element is not known at compile time.
-     border_mode: str, int or tuple of ``convdim`` elements where each element
+    border_mode: str, int or a tuple of two ints or pairs of ints
-        is an integer or a tuple of length 2.
        Either of the following:
        ``'valid'``: apply filter wherever it completely overlaps with the
            input. Generates output of shape: input shape - filter shape + 1
        ``'full'``: apply filter wherever it partly overlaps with the input.
            Generates output of shape: input shape + filter shape - 1
-        ``'half'``: pad input with a symmetric border of ``filter size // 2``
+        ``'half'``: pad input with a symmetric border of ``filter rows // 2``
-            in each convolution dimension, then perform a valid convolution.
+            rows and ``filter columns // 2`` columns, then perform a valid
-            For filters with an odd filter size, this leads to the output
+            convolution. For filters with an odd number of rows and columns, this
-            shape being equal to the input shape.
+            leads to the output shape being equal to the input shape.
        ``int``: pad input with a symmetric border of zeros of the given
            width, then perform a valid convolution.
        ``(int1, int2)``: (for 2D) pad input with a symmetric border of ``int1``,
@@ -91,11 +90,6 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
        ``(int1, (int2, int3))`` or ``((int1, int2), int3)``: (for 2D)
            pad input with one symmetric border of `int1`` or ``int3``, and
            one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
-        ``((int1, int2), (int3, int4))``: (for 2D) pad input with an asymmetric
-            border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
-            along the second dimension.
-        ``(int1, int2, int3)``: (for 3D) pad input with a symmetric border of
-            ``int1``, ``int2`` and ``int3``, then perform a valid convolution.
    subsample: tuple of len 2
        Factor by which to subsample the output.
@@ -208,7 +202,7 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
        You can give ``None`` for any element of the list to specify that this
        element is not known at compile time.
-    border_mode: str, int or tuple of two elements
+    border_mode: str, int or tuple of two int
        Refers to the ``border_mode`` argument of the corresponding forward
        (non-transposed) convolution. See the argument description in
        ``conv2d``.  What was ``padding`` for the forward convolution means

--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -52,11 +52,11 @@ def get_conv_output_shape(image_shape, kernel_shape,
        number of output channels, height and width of the output, number of
        input channels, height and width of the kernel.
        None where undefined.
-    border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
+     border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
-        or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
+        or numeric) or pairs of ints. If it is a string, it must be 'valid',
-        If it is a tuple, its two (or three) elements respectively correspond
+        'half' or 'full'. If it is a tuple, its two (or three) elements respectively
-        to the padding (possibly left and right) on height and width
+        correspond to the padding on height and width (and possibly depth)
-        (and possibly depth) axis.
+        axis. For asymmetric padding, provide a pair of ints for each dimension.
    subsample: tuple of int (symbolic or numeric). Its two or three elements
        espectively correspond to the subsampling on height and width (and
        possibly depth) axis.
@@ -104,10 +104,11 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
        given axis. None if undefined.
    kernel_shape: int or None. Corresponds to the kernel shape on a given
        axis. None if undefined.
-    border_mode: string, int or tuple. If it is a string, it must be
+    border_mode: string, int or tuple of 2 ints. If it is a string, it must be
        'valid', 'half' or 'full'. If it is an integer, it must correspond to
        the padding on the considered axis. If it is a tuple, its two elements
-        must correspond to the padding (left and right) on the desired axis.
+        must correspond to the asymmetric padding (e.g., left and right) on
+        the considered axis.
    subsample: int. It must correspond to the subsampling on the
        considered axis.
    dilation: int. It must correspond to the dilation on the
@@ -173,11 +174,11 @@ def get_conv_gradweights_shape(image_shape, top_shape,
        image shape. Its four (or five) element must correspond respectively
        to: batch size, number of output channels, height and width (and
        possibly depth) of the image. None where undefined.
-    border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
+     border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
-        or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
+        or numeric) or pairs of ints. If it is a string, it must be 'valid',
-        If it is a tuple, its two (or three) elements respectively correspond
+        'half' or 'full'. If it is a tuple, its two (or three) elements respectively
-        to the padding (possibly left and right) on height and width
+        correspond to the padding on height and width (and possibly depth)
-        (and possibly depth) axis.
+        axis. For asymmetric padding, provide a pair of ints for each dimension.
    subsample: tuple of int (symbolic or numeric). Its two or three elements
        respectively correspond to the subsampling on height and width (and
        possibly depth) axis.
@@ -234,10 +235,11 @@ def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode,
        given axis. None if undefined.
    top_shape: int or None. Corresponds to the top shape on a given axis.
        None if undefined.
-    border_mode: string, int or tuple. If it is a string, it must be
+    border_mode: string, int or tuple of 2 ints. If it is a string, it must be
        'valid', 'half' or 'full'. If it is an integer, it must correspond to
        the padding on the considered axis. If it is a tuple, its two elements
-        must correspond to the padding (left and right) on the desired axis.
+        must correspond to the asymmetric padding (e.g., left and right) on
+        the considered axis.
    subsample: int. It must correspond to the subsampling on the
        considered axis.
    dilation: int. It must correspond to the dilation on the
@@ -296,11 +298,11 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
        image shape. Its four (or five) element must correspond respectively
        to: batch size, number of output channels, height and width (and
        possibly depth) of the image. None where undefined.
-    border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
+     border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
-        or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
+        or numeric) or pairs of ints. If it is a string, it must be 'valid',
-        If it is a tuple, its two (or three) elements respectively correspond
+        'half' or 'full'. If it is a tuple, its two (or three) elements respectively
-        to the padding (possibly left and right) on height and width
+        correspond to the padding on height and width (and possibly depth)
-        (and possibly depth) axis.
+        axis. For asymmetric padding, provide a pair of ints for each dimension.
    subsample: tuple of int (symbolic or numeric). Its two or three elements
        respectively correspond to the subsampling on height and width (and
        possibly depth) axis.
@@ -354,10 +356,11 @@ def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode,
        axis. None if undefined.
    top_shape: int or None. Corresponds to the top shape on a given axis.
        None if undefined.
-    border_mode: string, int or tuple. If it is a string, it must be
+    border_mode: string, int or tuple of 2 ints. If it is a string, it must be
        'valid', 'half' or 'full'. If it is an integer, it must correspond to
        the padding on the considered axis. If it is a tuple, its two elements
-        must correspond to the padding (left and right) on the desired axis.
+        must correspond to the asymmetric padding (e.g., left and right) on
+        the considered axis.
    subsample: int. It must correspond to the subsampling on the
        considered axis.
    dilation: int. It must correspond to the dilation on the
@@ -423,11 +426,11 @@ def check_conv_gradinputs_shape(image_shape, kernel_shape, output_shape,
        output shape. Its four (or five) elements must correspond respectively
        to: batch size, number of output channels, height and width
        (and possibly depth) of the output. None where undefined.
-    border_mode: string, int (symbolic or numeric) or tuple where each element
+     border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
-        is either an int or a tuple of length 2 (symbolic or numeric).
+        or numeric) or pairs of ints. If it is a string, it must be 'valid',
-        If it is a string, it must be 'valid', 'half' or 'full'.
+        'half' or 'full'. If it is a tuple, its two (or three) elements respectively
-        If it is a tuple, its two (or three) elements respectively correspond
+        correspond to the padding on height and width (and possibly depth)
-        to the padding on height and width (and possibly depth) axis.
+        axis. For asymmetric padding, provide a pair of ints for each dimension.
    subsample: tuple of int (symbolic or numeric). Its two or three elements
        respectively correspond to the subsampling on height and width (and
        possibly depth) axis.
@@ -553,8 +556,9 @@ def assert_shape(x, expected_shape, msg='Unexpected shape.'):
        return x
-def mode_to_pad(mode, convdim, kshp):
+def border_mode_to_pad(mode, convdim, kshp):
-    """ Computes a tuple for padding given the border_mode parameter
+    """
+    Computes a tuple for padding given the border_mode parameter
    Parameters
    ----------
@@ -708,10 +712,10 @@ def separable_conv2d(input,
            width, then perform a valid convolution.
        ``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
            and ``int2`` columns, then perform a valid convolution.
-        ``(int1, (int2, int3))`` or ``((int1, int2), int3)``: (for 2D)
+        ``(int1, (int2, int3))`` or ``((int1, int2), int3)``:
            pad input with one symmetric border of `int1`` or ``int3``, and
            one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
-        ``((int1, int2), (int3, int4))``: (for 2D) pad input with an asymmetric
+        ``((int1, int2), (int3, int4))``: pad input with an asymmetric
            border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
            along the second dimension.
@@ -1041,8 +1045,7 @@ def conv2d_grad_wrt_inputs(output_grad,
        Optional, possibly used  to choose an optimal implementation.
        You can give ``None`` for any element of the list to specify that
        this element is not known at compile time.
-     border_mode: str, int or tuple of 2 elements where each element
+    border_mode: str, int or a tuple of two ints or pairs of ints
-        is an integer or a tuple of length 2.
        Either of the following:
          ``'valid'``
@@ -1073,8 +1076,8 @@ def conv2d_grad_wrt_inputs(output_grad,
            pad input with one symmetric border of `int1`` or ``int3``, and
            one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
-          ``((int1, int2), (int3, int4))``: (for 2D) pad input with an asymmetric
+          ``((int1, int2), (int3, int4))``
-            border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
+            pad input with an asymmetric border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
            along the second dimension.
    subsample : tuple of len 2
@@ -1336,8 +1339,7 @@ def conv2d_grad_wrt_weights(input,
        Optional, possibly used to choose an optimal implementation.
        You can give ``None`` for any element of the list to specify
        that this element is not known at compile time.
-     border_mode: str, int or tuple of 2 elements where each element
+    border_mode: str, int or a tuple of two ints or pairs of ints
-        is an integer or a tuple of length 2.
        Either of the following:
          ``'valid'``
@@ -1368,9 +1370,9 @@ def conv2d_grad_wrt_weights(input,
            pad input with one symmetric border of `int1`` or ``int3``, and
            one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
-          ``((int1, int2), (int3, int4))``: (for 2D) pad input with an asymmetric
+          ``((int1, int2), (int3, int4))``
-            border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
+            pad input with an asymmetric border of ``(int1, int2)`` along
-            along the second dimension.
+            one dimension and ``(int3, int4)`` along the second dimension.
    subsample : tuple of len 2
        The subsampling used in the forward pass of the convolutional
        operation.  Also called strides elsewhere.
@@ -1584,16 +1586,17 @@ def conv3d_grad_wrt_weights(input,
    return gradWeight_op(input, output_grad, filter_shape[-3:])
-def causal_conv(input,
+def causal_conv1d(input,
-                filters,
+                  filters,
-                filter_shape,
+                  filter_shape,
-                input_shape=None,
+                  input_shape=None,
-                subsample=1,
+                  subsample=1,
-                filter_flip=True,
+                  filter_flip=True,
-                filter_dilation=1,
+                  filter_dilation=1,
-                num_groups=1,
+                  num_groups=1,
-                unshared=False):
+                  unshared=False):
-    """Computes (dilated) causal convolution
+    """
+    Computes (dilated) causal convolution
    The output at time t depends only on the inputs till t-1. Used for
    modelling temporal data.
@@ -1629,7 +1632,7 @@ def causal_conv(input,
    num_groups : int
        Divides the image, kernel and output tensors into num_groups
        separate groups. Each which carry out convolutions separately
-    unshared: bool
+    unshared : bool
        If true, then unshared or 'locally connected' convolution will be
        performed. A different filter will be used for each region of the
        input.
@@ -1640,6 +1643,11 @@ def causal_conv(input,
        Set of feature vectors generated by convolutional layer. Tensor is
        of shape (batch_size, output_channels, output_length)
+    Notes
+    -----
+    :note: Currently, this is implemented with the 2D convolution ops.
    """
    input = as_tensor_variable(input)
@@ -1885,8 +1893,7 @@ class BaseAbstractConv(Op):
        element is not known at compile time.
        kshp is defined w.r.t the forward conv.
-     border_mode: str, int or tuple of ``convdim`` elements where each element
+    border_mode: str, int or a tuple of two ints or pairs of ints
-        is an integer or a tuple of length 2.
        Either of the following:
        ``'valid'``: apply filter wherever it completely overlaps with the
@@ -1965,12 +1972,15 @@ class BaseAbstractConv(Op):
                    'invalid border_mode {}, which must be a '
                    'tuple of length {}'.format(border_mode, convdim))
            for mode in border_mode:
+                if isinstance(mode, tuple) and convdim != 2:
+                    raise NotImplementedError(
+                        'Asymmetric padding not implemented for {}D'.format(convdim))
                if not((isinstance(mode, integer_types) and mode >= 0) or
                        (isinstance(mode, tuple) and len(mode) == 2 and min(mode) >= 0 and
                         all(isinstance(m, integer_types) for m in mode))):
                    raise ValueError(
                        'invalid border mode {}. The tuple can only contain integers '
-                        ' or tuples of integers of length 2'.format(border_mode))
+                        ' or pairs of integers'.format(border_mode))
        elif border_mode not in ('valid', 'full', 'half'):
            raise ValueError(
                'invalid border_mode {}, which must be either '
@@ -2238,7 +2248,7 @@ class AbstractConv(BaseAbstractConv):
                                      % self.convdim)
        o, = out_
        mode = self.border_mode
-        pad = mode_to_pad(mode, self.convdim, dil_kernshp)
+        pad = border_mode_to_pad(mode, self.convdim, dil_kernshp)
        if any(p != (0, 0) for p in pad):
            mode = "valid"
@@ -2503,7 +2513,7 @@ class AbstractConv_gradWeights(BaseAbstractConv):
        dil_shape = tuple((shape[i] - 1) * self.filter_dilation[i] + 1
                          for i in range(self.convdim))
-        pad = mode_to_pad(self.border_mode, self.convdim, dil_shape)
+        pad = border_mode_to_pad(self.border_mode, self.convdim, dil_shape)
        if any(p != (0, 0) for p in pad):
            new_img = np.zeros((img.shape[0], img.shape[1]) +
@@ -2805,8 +2815,7 @@ class AbstractConv_gradInputs(BaseAbstractConv):
        dil_kernshp = tuple((kern.shape[-self.convdim + i] - 1) * self.filter_dilation[i] + 1
                            for i in range(self.convdim))
-        mode = self.border_mode
+        pad = border_mode_to_pad(self.border_mode, self.convdim, dil_kernshp)
-        pad = mode_to_pad(mode, self.convdim, dil_kernshp)
        imshp = self.imshp[:] if self.imshp is not None else [None] * (2 + self.convdim)
        fallback_imshp = ([topgrad.shape[0], kern.shape[-self.convdim - 1]] +
@@ -2815,7 +2824,7 @@ class AbstractConv_gradInputs(BaseAbstractConv):
                 for i in range(2 + self.convdim)]
        expected_topgrad_shape = get_conv_output_shape(
            imshp, kern.shape,
-            mode, self.subsample, self.filter_dilation)
+            self.border_mode, self.subsample, self.filter_dilation)
        if not tuple(expected_topgrad_shape) == tuple(topgrad.shape):
            raise ValueError(
                'invalid input_shape for gradInputs: the given input_shape '

--- a/theano/tensor/nnet/corr.py
+++ b/theano/tensor/nnet/corr.py
@@ -89,7 +89,7 @@ class BaseCorrMM(gof.OpenMPOp):
            raise ValueError(
                'invalid border_mode {}, which must be either '
                '"valid", "full", "half", an integer or a tuple '
-                'of length 2'.format(border_mode))
+                'of two integers or a pair of integers'.format(border_mode))
        self.border_mode = border_mode
        if len(subsample) != 2:
            raise ValueError("subsample must have two elements")

--- a/theano/tensor/nnet/tests/test_abstract_conv.py
+++ b/theano/tensor/nnet/tests/test_abstract_conv.py
@@ -24,7 +24,7 @@ from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
 from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D
 from theano.tensor.nnet.abstract_conv import bilinear_upsampling
 from theano.tensor.nnet.abstract_conv import separable_conv2d, separable_conv3d
-from theano.tensor.nnet.abstract_conv import causal_conv
+from theano.tensor.nnet.abstract_conv import causal_conv1d
 from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
                                     CorrMM_gradInputs)
 from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights,
@@ -2037,7 +2037,7 @@ class TestCausalConv(unittest.TestCase):
        img_sym = theano.tensor.tensor3('img')
        kern_sym = theano.tensor.tensor3('kern')
-        sym_out = causal_conv(img_sym, kern_sym, self.kern.shape, filter_dilation=self.dilation)
+        sym_out = causal_conv1d(img_sym, kern_sym, self.kern.shape, filter_dilation=self.dilation)
        causal_func = theano.function([img_sym, kern_sym], sym_out, mode=self.mode)
@@ -2046,6 +2046,6 @@ class TestCausalConv(unittest.TestCase):
        utt.assert_allclose(output, self.precomp_top)
        def causal_conv_fn(inputs_val, filters_val):
-            return causal_conv(inputs_val, filters_val, self.kern.shape, filter_dilation=1)
+            return causal_conv1d(inputs_val, filters_val, self.kern.shape, filter_dilation=1)
        utt.verify_grad(causal_conv_fn, [self.img, self.kern], mode=self.mode, eps=1)