numpydoc for theano/tensor/basic.py

47bf7427 · Iban Harlouchet · 0a7415d7 · 47bf7427
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
 """A `Type` and `Op` classes to work with numpy.ndarrays symbolically."""
 import sys
 import warnings
@@ -63,8 +62,10 @@ class ShapeError(Exception):
 def check_equal_numpy(x, y):
    """
-    Returns True iff x and y are equal (checks the dtype and
+    Return True iff x and y are equal.
-    shape if x and y are numpy.ndarray instances).
+    Checks the dtype and shape if x and y are numpy.ndarray instances.
    """
    if isinstance(x, numpy.ndarray) and isinstance(y, numpy.ndarray):
        return (x.dtype == y.dtype and x.shape == y.shape and
@@ -89,6 +90,7 @@ def constructor(f):
    Make `f` appear as a constructor in the oplist (`gen_oplist`,
    doc/oplist.txt).
    """
    __oplist_constructor_list.append(f)
    return f
@@ -107,8 +109,7 @@ if 0:
    # - JB 20100226
    def as_cuda_or_tensor_variable(x, name=None, ndim=None):
        """
-        This function do the same as_tensor_variable, but don't
+        Do the same as_tensor_variable, but do not transfer the value on the gpu.
-        transfert the value on the gpu
        """
        if hasattr(x, '_as_CudaNdarrayVariable'):
            # TODO: pass name and ndim arguments
@@ -117,29 +118,31 @@ if 0:
 def as_tensor_variable(x, name=None, ndim=None):
-    """Return `x`, transformed into a `TensorType`
+    """Return `x`, transformed into a `TensorType`.
-    This function is often used by `make_node` methods of `Op`
+    This function is often used by `make_node` methods of `Op` subclasses
-    subclasses to turn ndarrays, numbers, `Scalar` instances, `Apply`
+    to turn ndarrays, numbers, `Scalar` instances, `Apply` instances and
-    instances and `TensorType` instances into valid input list
+    `TensorType` instances into valid input list elements.
-    elements.
+    Parameters
-    :Parameters:
+    ----------
-     - `x`: Apply instance, Variable instance, numpy.ndarray, or number
+    x : Apply instance, Variable instance, numpy.ndarray, or number
-       This thing will be transformed into a `Variable` in a sensible way.  An
+        This thing will be transformed into a `Variable` in a sensible way. An
-       ndarray argument will not be copied, but a list of numbers will be
+        ndarray argument will not be copied, but a list of numbers will be
-       copied to make an ndarray.
+        copied to make an ndarray.
-     - `name`: str or None
+    name : str or None
-       If a new `Variable` instance is created, it will be named with this
+        If a new `Variable` instance is created, it will be named with this
-       string.
+        string.
-     - `ndim`: None or integer
+    ndim : None or integer
-       Return a Variable with this many dimensions.  Raise TypeError if it's
+        Return a Variable with this many dimensions. Raise TypeError if it's
-       not possible.
+        not possible.
-    :Exceptions:
+    Raises
-     - `ValueError`: raised if an `Apply` with more then one output is fetched
+    ------
-     - `AsTensorError`: raised if `x` cannot be converted to a TensorType
+    ValueError
-       Variable
+        If an `Apply` with more than one output is fetched.
+    AsTensorError
+        If `x` cannot be converted to a TensorType Variable.
    """
    if hasattr(x, '_as_TensorVariable'):
@@ -231,16 +234,18 @@ class NumpyAutocaster(object):
              float32);
            - if no data type can represent `x` without loss of precision, then
              the last data type in the tuple will be used.
+    Parameters
+    ----------
+    dtypes: tuple of strings
+        The ordered list of preferred data types (only used when
+        `config.cast_policy` is set to 'custom', see the `NumpyAutocaster`
+        help for details).
    """
-    def __init__(self, dtypes):
-        """
-        Constructor.
-        :type dtypes: Tuple of strings.
+    def __init__(self, dtypes):
-        :param dtypes: The ordered list of preferred data types (only used when
-        `config.cast_policy` is set to 'custom', see the `NumpyAutocaster` help
-        for details).
-        """
        self.dtypes = tuple(dtypes)
    def __call__(self, x):
@@ -312,17 +317,20 @@ autocast_float = NumpyAutocaster(('float16', 'float32', 'float64'))
 #
 class autocast_float_as(object):
    """
+    Temporarily adjust autocasting behavior.
    This class makes it possible to temporarily and locally adjust autocasting
    behavior when `config.cast_policy` is set to 'custom'.
    If `config.cast_policy` is not 'custom', an exception is raised.
+    This class might be convenient in some code, but it definitely
+    helps to test the autocasting mechanism.
-    For example:
+    Examples
+    --------
    >>> with autocast_float_as('float32'):
    ...    assert (fvector() + 1.1).dtype == 'float32'  # temporary downcasting
    >>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour
-    This class might be convenient in some code, but it definitely
-    helps to test the autocasting mechanism.
    """
    def __init__(self, *dtypes):
        self.dtypes = dtypes
@@ -339,11 +347,14 @@ class autocast_float_as(object):
 def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
-    """Return a symbolic `Constant` with value `x`
+    """Return a symbolic `Constant` with value `x`.
-    :Exceptions:
+    Raises
-     - `TypeError`: `x` could not be converted to a numpy.ndarray
+    ------
-     - `ValueError`: `x` could not be expanded to have ndim dimensions
+    TypeError
+        `x` could not be converted to a numpy.ndarray.
+    ValueError
+        `x` could not be expanded to have ndim dimensions.
    """
    if dtype is not None:
@@ -507,8 +518,13 @@ class EmptyConstantError(NotScalarConstantError):
 def numpy_scalar(data):
-    """ Return a scalar stored in a numpy ndarray, or raise
+    """ Return a scalar stored in a numpy ndarray.
-    NotScalarConstantError if the numpy ndarray is not a scalar
+    Raises
+    ------
+     NotScalarConstantError
+        If the numpy ndarray is not a scalar.
    """
    # handle case where data is numpy.array([])
@@ -536,26 +552,29 @@ get_scalar_constant_value_elemwises = (
 def get_scalar_constant_value(orig_v, elemwise=True,
                              only_process_constants=False):
-    """return the constant scalar(0-D) value underlying variable `v`
+    """Return the constant scalar(0-D) value underlying variable `v`.
-    If v is the output of dimshuffles, fills, allocs, rebroadcasts,
+    If `v` is the output of dimshuffles, fills, allocs, rebroadcasts,
-    cast, OutputGuard, DeepCopyOp, ScalarFromTensor, ScalarOp,
+    cast, OutputGuard, DeepCopyOp, ScalarFromTensor, ScalarOp, Elemwise
-    Elemwise and some pattern with Subtensor,
+    and some pattern with Subtensor, this function digs through them.
-    this function digs through them.
    If `v` is not some view of constant scalar data, then raise a
    NotScalarConstantError.
-    :param elemwise: If False, we won't try to go into elemwise.
+    Parameters
-        So this call is faster.
+    ----------
+    elemwise : bool
+        If False, we won't try to go into elemwise. So this call is faster.
+    only_process_constants : bool
+        If True, we only attempt to obtain the value of `orig_v` if it's
+        directly constant and don't try to dig through dimshuffles, fills,
+        allocs, and other to figure out its value.
-    :param only_process_constants: If True, we only attempt to obtain
+    Notes
-            the value of `orig_v` if it's directly constant and don't
+    -----
-            try to dig through dimshuffles, fills, allocs, and other to figure
+        There may be another function similar to this one in the code,
-            out its value.
+        but I'm not sure where it is.
-    :note: There may be another function similar to this one in the
-        code, but I'm not sure where it is.
    """
    v = orig_v
    while True:
@@ -773,8 +792,14 @@ lscalar = TensorType('int64', ())
 def scalar(name=None, dtype=None):
    """Return a symbolic scalar variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    Parameters
+    ----------
+    dtype: numeric
+        None means to use theano.config.floatX.
+    name
+        A name to attach to this variable.
    """
    if dtype is None:
        dtype = config.floatX
@@ -803,8 +828,14 @@ lvector = TensorType('int64', (False, ))
 def vector(name=None, dtype=None):
    """Return a symbolic vector variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    Parameters
+    ----------
+    dtype: numeric
+        None means to use theano.config.floatX.
+    name
+        A name to attach to this variable
    """
    if dtype is None:
        dtype = config.floatX
@@ -830,8 +861,14 @@ lmatrix = TensorType('int64', (False, False))
 def matrix(name=None, dtype=None):
    """Return a symbolic matrix variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    Parameters
+    ----------
+    dtype: numeric
+        None means to use theano.config.floatX.
+    name
+        A name to attach to this variable.
    """
    if dtype is None:
        dtype = config.floatX
@@ -857,8 +894,14 @@ lrow = TensorType('int64', (True, False))
 def row(name=None, dtype=None):
    """Return a symbolic row variable (ndim=2, broadcastable=[True,False]).
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    Parameters
+    ----------
+    dtype: numeric type
+        None means to use theano.config.floatX.
+    name
+        A name to attach to this variable.
    """
    if dtype is None:
        dtype = config.floatX
@@ -878,8 +921,14 @@ lcol = TensorType('int64', (False, True))
 def col(name=None, dtype=None):
    """Return a symbolic column variable (ndim=2, broadcastable=[False,True]).
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    Parameters
+    ----------
+    dtype : numeric
+        None means to use theano.config.floatX.
+    name
+        A name to attach to this variable.
    """
    if dtype is None:
        dtype = config.floatX
@@ -899,8 +948,14 @@ ltensor3 = TensorType('int64', ((False,) * 3))
 def tensor3(name=None, dtype=None):
    """Return a symbolic 3-D variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    Parameters
+    ----------
+    dtype: numeric type
+        None means to use theano.config.floatX.
+    name
+        A name to attach to this variable.
    """
    if dtype is None:
        dtype = config.floatX
@@ -922,8 +977,14 @@ ltensor4 = TensorType('int64', ((False,) * 4))
 def tensor4(name=None, dtype=None):
    """Return a symbolic 4-D variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    Parameters
+    ----------
+    dtype: numeric type
+        None means to use theano.config.floatX.
+    name
+        A name to attach to this variable.
    """
    if dtype is None:
        dtype = config.floatX
@@ -957,6 +1018,7 @@ def _scal_elemwise_with_nfunc(nfunc, nin, nout):
    **destination** inputs it takes. That is, the function should
    take nin+nout inputs. nout == 0 means that the numpy function
    does not take a numpy array argument to put its result in.
    """
    def construct(symbol):
        symbolname = symbol.__name__
@@ -1183,7 +1245,9 @@ def cast(x, dtype):
 class MaxAndArgmax(Op):
-    """Calculate the max and argmax over a given axis or over all axes.
+    """
+    Calculate the max and argmax over a given axis or over all axes.
    """
    nin = 2  # tensor, axis
    nout = 2  # max val, max idx
@@ -1418,6 +1482,7 @@ def makeKeepDims(x, y, axis):
    Reintroduces in y with length one the axes of x which have been left out
    in a prior reduction of x. With this option, the resulting tensor will
    broadcast correctly against the original tensor x.
    """
    x = as_tensor_variable(x)
    y = as_tensor_variable(y)
@@ -1453,14 +1518,18 @@ def makeKeepDims(x, y, axis):
 def max_and_argmax(a, axis=None, keepdims=False):
    """
    Returns maximum elements and their indices obtained by iterating over
-    given axis
+    given axis.
    When axis is None (the default value), the max is performed
    over the flattened tensor.
-    keepdims: If this is set to True, the axes which are reduced are left in
+    Parameters
+    ----------
+    keepdims : bool
+        If this is set to True, the axes which are reduced are left in
        the result as dimensions with size one. With this option, the result
        will broadcast correctly against the original tensor.
    """
    out, argout = _max_and_argmax(a, axis)
@@ -1474,16 +1543,22 @@ def max_and_argmax(a, axis=None, keepdims=False):
 @constructor
 def max(x, axis=None, keepdims=False):
    """
-    Returns maximum elements obtained by iterating over given axis
+    Returns maximum elements obtained by iterating over given axis.
    When axis is None (the default value), the max is performed
    over the flattened tensor.
-    keepdims: If this is set to True, the axes which are reduced are left in
+    Parameters
+    ----------
+    keepdims: bool
+        If this is set to True, the axes which are reduced are left in
        the result as dimensions with size one. With this option, the result
        will broadcast correctly against the original tensor.
-    :note: we return an error as numpy when we reduce a dim with a shape of 0
+    Notes
+    -----
+    We return an error as numpy when we reduce a dim with a shape of 0.
    """
    # We have a choice of implementing this call with the
@@ -1511,14 +1586,18 @@ def max(x, axis=None, keepdims=False):
 @constructor
 def argmax(x, axis=None, keepdims=False):
    """
-    Returns indices of maximum elements obtained by iterating over given axis
+    Returns indices of maximum elements obtained by iterating over given axis.
    When axis is None (the default value), the argmax is performed
    over the flattened tensor.
-    keepdims: If this is set to True, the axes which are reduced are left in
+    Parameters
+    ----------
+    keepdims : bool
+        If this is set to True, the axes which are reduced are left in
        the result as dimensions with size one. With this option, the result
        will broadcast correctly against the original tensor.
    """
    # In python (using MaxAndArgmax.perform()) this leads to a wasteful
    # implementation that goes through the data twice instead of once
@@ -1534,14 +1613,18 @@ def argmax(x, axis=None, keepdims=False):
 @constructor
 def min(x, axis=None, keepdims=False):
    """
-    Returns minimum elements obtained by iterating over given axis
+    Returns minimum elements obtained by iterating over given axis.
    When axis is None (the default value), the min is performed
    over the flattened tensor.
-    keepdims: If this is set to True, the axes which are reduced are left in
+    Parameters
+    ----------
+    keepdims: bool
+        If this is set to True, the axes which are reduced are left in
        the result as dimensions with size one. With this option, the result
        will broadcast correctly against the original tensor.
    """
    x = as_tensor_variable(x)
    str_x_type = str(x.dtype)
@@ -1555,14 +1638,18 @@ def min(x, axis=None, keepdims=False):
 @constructor
 def argmin(x, axis=None, keepdims=False):
    """
-    Returns indices of minimum elements obtained by iterating over given axis
+    Returns indices of minimum elements obtained by iterating over given axis.
    When axis is None (the default value), the argmin is performed
    over the flattened tensor.
-    keepdims: If this is set to True, the axes which are reduced are left in
+    Parameters
+    ----------
+    keepdims: bool
+        If this is set to True, the axes which are reduced are left in
        the result as dimensions with size one. With this option, the result
        will broadcast correctly against the original tensor.
    """
    x = as_tensor_variable(x)
    str_x_type = str(x.dtype)
@@ -1579,6 +1666,7 @@ def smallest(*args):
    Return the [elementwise] smallest of a variable number of arguments.
    Like python's min.
    """
    if len(args) == 2:
        a, b = args
@@ -1593,6 +1681,7 @@ def largest(*args):
    Return the [elementwise] largest of a variable number of arguments.
    Like python's max.
    """
    if len(args) == 2:
        a, b = args
@@ -1647,31 +1736,34 @@ def isinf(a):
 def allclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
    """
-    Implements Numpy's ``allclose`` on tensors.
+    Implement Numpy's ``allclose`` on tensors.
    ``absolute(a - b) <= (atol + rtol * absolute(b))``
-    :note: Not a symmetric equation. See Numpy's documentation.
+    Parameters
+    ----------
-    :param a: input to compare
+    a : tensor
-    :type a: tensor
+        Input to compare.
+    b : tensor
-    :param b: input to compare
+        Input to compare.
-    :type b: tensor
+    rtol : float
+        The relative tolerance parameter.
-    :param rtol: the relative tolerance parameter
+    atol : float
-    :type rtol: float
+        The absolute tolerance parameter.
+    equal_nan: bool
+        Whether to consider nan's in the same place to be close.
-    :param atol: the absolute tolerance parameter
+    Returns
-    :type atol: float
+    -------
+    bool
+        A boolean value (of type int8 returned by the tensor elementwise `all`
+        function) whether all elements in a and b are in the tolerance range
+        defined above.
-    :param equal_nan: whether to consider nan's in the same place to be close
+    Notes
-    :type equal_nan: bool
+    -----
+    Not a symmetric equation. See Numpy's documentation.
-    :returns: a boolean value (of type int8 returned by the tensor
-            elementwise `all` function) whether all elements in a and b are in
-            the tolerance range defined above.
-    :rtype: int8
    """
    return all(isclose(a, b, rtol, atol, equal_nan))
@@ -1680,34 +1772,38 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
    """
    Implements Numpy's ``isclose`` on tensors.
-    The tolerance values are positive, typically very small numbers.  The
+    The tolerance values are positive, typically very small numbers. The
    relative difference (`rtol` * abs(`b`)) and the absolute difference
    `atol` are added together to compare against the absolute difference
    between `a` and `b`.
    ``absolute(a - b) <= (atol + rtol * absolute(b))``
-    :note: Not a symmetric equation. See Numpy's documentation.
+    Parameters
+    ----------
-    :param a: input to compare
+    a : tensor
-    :type a: tensor
+        Input to compare.
+    b : tensor
-    :param b: input to compare
+        Input to compare.
-    :type b: tensor
+    rtol : float
+        The relative tolerance parameter.
-    :param rtol: the relative tolerance parameter
+    atol : float
-    :type rtol: float
+        The absolute tolerance parameter.
+    equal_nan : bool
-    :param atol: the absolute tolerance parameter
+        Whether to consider nan's in the same place to be close
-    :type atol: float
-    :param equal_nan: whether to consider nan's in the same place to be close
+    Returns
-    :type equal_nan: bool
+    -------
+    int8
+        A boolean (int8) array where two arrays are element-wise equal
+        within a tolerance.
-    :returns: returns a boolean (int8) array where two arrays are element-wise
+    Notes
-            equal within a tolerance.
+    -----
-    :rtype: int8
+    Not a symmetric equation. See Numpy's documentation.
+    Examples
+    --------
    >>> import theano
    >>> import numpy as np
    >>> a = theano._asarray([1e10, 1e-7], dtype="float64")
@@ -1738,6 +1834,7 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
    >>> b = theano._asarray([1.0, np.inf], dtype="float64")
    >>> theano.tensor.isclose(a, b).eval()
    array([1, 1], dtype==int8)
    """
    # close will be an int8 array of 1 where within tolerance
    # and 0 where not within tolerance or there was a nan or inf value.
@@ -2165,7 +2262,7 @@ class Nonzero(gof.Op):
    Returns
    -------
    result : matrix
-        matrix containing the indices of the non-zero elements of a.
+        Matrix containing the indices of the non-zero elements of a.
    See Also
    --------
@@ -2220,7 +2317,6 @@ def nonzero(a, return_matrix=False):
    ----------
    a : array_like
        Input array.
    return_matrix : bool
        If True, returns a symbolic matrix. If False, returns a tuple of
        arrays. Defaults to False.
@@ -2268,6 +2364,7 @@ def flatnonzero(a):
    --------
    nonzero : Return the indices of the non-zero elements of the input array.
    nonzero_values : Return the non-zero elements of the input array
    """
    if a.ndim == 0:
        raise ValueError('Nonzero only supports non-scalar arrays.')
@@ -2307,6 +2404,7 @@ def nonzero_values(a):
    nonzero : Return the indices of the non-zero elements of the input array.
    flatnonzero : Return the indices of the non-zero elements of the
        flattened input array.
    """
    return a.flatten()[flatnonzero(a)]
@@ -2365,6 +2463,7 @@ def tri(N, M=None, k=0, dtype=None):
    tri : Array of shape (N, M)
        Array with its lower triangle filled with ones and zero elsewhere;
        in other words ``T[i,j] == 1`` for ``i <= j + k``, 0 otherwise.
    """
    if dtype is None:
        dtype = config.floatX
@@ -2395,7 +2494,8 @@ def tril(m, k=0):
    See Also
    --------
-    triu : same thing, only for the upper triangle
+    triu : Same thing, only for the upper triangle.
    """
    return m * tri(m.shape[0], m.shape[1], k=k, dtype=m.dtype)
@@ -2411,7 +2511,8 @@ def triu(m, k=0):
    See Also
    --------
-    tril : lower triangle of an array
+    tril : Lower triangle of an array.
    """
    return m * (1 - tri(m.shape[0], m.shape[1], k=k - 1, dtype=m.dtype))
@@ -2456,21 +2557,22 @@ def eye(n, m=None, k=0, dtype=None):
    Parameters
    ----------
    n : int
-      Number of rows in the output.
+        Number of rows in the output.
    m : int, optional
-      Number of columns in the output. If None, defaults to `N`.
+        Number of columns in the output. If None, defaults to `N`.
    k : int, optional
-      Index of the diagonal: 0 (the default) refers to the main diagonal,
+        Index of the diagonal: 0 (the default) refers to the main diagonal,
-      a positive value refers to an upper diagonal, and a negative value
+        a positive value refers to an upper diagonal, and a negative value
-      to a lower diagonal.
+        to a lower diagonal.
    dtype : data-type, optional
-      Data-type of the returned array.
+        Data-type of the returned array.
    Returns
    -------
    I : ndarray of shape (N,M)
-      An array where all elements are equal to zero, except for the `k`-th
+        An array where all elements are equal to zero, except for the `k`-th
-      diagonal, whose values are equal to one.
+        diagonal, whose values are equal to one.
    """
    if dtype is None:
        dtype = config.floatX
@@ -2485,7 +2587,7 @@ def identity_like(x):
 class Alloc(gof.Op):
-    """Create a Tensor from an initial value and a desired shape
+    """Create a Tensor from an initial value and a desired shape.
    alloc(value, shape0, shape1, ..., shapeN)
@@ -2500,6 +2602,7 @@ class Alloc(gof.Op):
    This Op is used to replace fill() during optimizations because after shapes
    are lifted, the first argument to fill can often be pruned from the graph.
    """
    __props__ = ()
@@ -2642,6 +2745,7 @@ class Alloc(gof.Op):
        for size mismatches.
        If you always want an Alloc node, call make_node.
        """
        ret = super(Alloc, self).__call__(val, *shapes, **kwargs)
        try:
@@ -2709,18 +2813,22 @@ pprint.assign(tensor_copy, printing.IgnorePrinter())
 @constructor
 def sum(input, axis=None, dtype=None, keepdims=False, acc_dtype=None):
    """
-    Computes the sum along the given axis(es) of a tensor `input`
+    Computes the sum along the given axis(es) of a tensor `input`.
    When axis is None (the default value), the sum is performed
    over the flattened tensor.
-    keepdims: If this is set to True, the axes which are reduced are left in
-        the result as dimensions with size one. With this option, the result
-        will broadcast correctly against the original tensor.
    For full documentation see ``tensor.elemwise.Sum``.
    In particular please pay attention to the important warning when using
    a custom acc_dtype.
+    Parameters
+    ----------
+    keepdims: bool
+        If this is set to True, the axes which are reduced are left in
+        the result as dimensions with size one. With this option, the result
+        will broadcast correctly against the original tensor.
    """
    out = elemwise.Sum(axis=axis, dtype=dtype, acc_dtype=acc_dtype)(input)
@@ -2736,16 +2844,20 @@ pprint.assign(Sum(), printing.FunctionPrinter('sum'))
 def prod(input, axis=None, dtype=None, keepdims=False, acc_dtype=None,
         no_zeros_in_input=False):
    """
-    Computes the product along the given axis(es) of a tensor `input`
+    Computes the product along the given axis(es) of a tensor `input`.
    When axis is None (the default value), the product is performed
    over the flattened tensor.
-    keepdims: If this is set to True, the axes which are reduced are left in
+    For full documentation see ``tensor.elemwise.Prod``.
+    Parameters
+    ----------
+    keepdims: bool
+        If this is set to True, the axes which are reduced are left in
        the result as dimensions with size one. With this option, the result
        will broadcast correctly against the original tensor.
-    For full documentation see ``tensor.elemwise.Prod``.
    """
    out = elemwise.Prod(axis, dtype=dtype, acc_dtype=acc_dtype,
@@ -2803,31 +2915,32 @@ class Mean(elemwise.CAReduce):
 def mean(input, axis=None, dtype=None, op=False, keepdims=False,
         acc_dtype=None):
    """
-    Computes the mean value along the given axis(es) of a tensor `input`
+    Computes the mean value along the given axis(es) of a tensor `input`.
-    :param axis: compute the mean along this axis of the tensor.
+    Parameters
-                 None means all axes (like numpy).
+    ----------
-    :type axis: None or int or (list of int) (see `Sum`)
+    axis : None or int or (list of int) (see `Sum`)
+        Compute the mean along this axis of the tensor.
-    :param dtype: dtype to cast the result of the inner summation into.
+        None means all axes (like numpy).
+    dtype: None or string
+        Dtype to cast the result of the inner summation into.
        For instance, by default, a sum of a float32 tensor will be
        done in float64 (acc_dtype would be float64 by default),
        but that result will be casted back in float32.
-    :type dtype: None or string
+    keepdims: bool
+        If this is set to True, the axes which are reduced are
-    :param keepdims: If this is set to True, the axes which are reduced are
        left in the result as dimensions with size one. With this option,
        the result will broadcast correctly against the original tensor.
+    acc_dtype: None or string
+        Dtype to use for the inner summation. This will not
+        necessarily be the dtype of the output (in particular
+        if it is a discrete (int/uint) dtype, the output will
+        be in a float type). If None, then we use the same rules as `sum()`.
-    :param acc_dtype: dtype to use for the inner summation. This will not
+    Notes
-                  necessarily be the dtype of the output (in particular
+    -----
-                  if it is a discrete (int/uint) dtype, the output will
+    For gpu, if you specify dtype=float32, everything will be done on the gpu.
-                  be in a float type).
-                  If None, then we use the same rules as `sum()`.
-    :type acc_dtype: None or string
-    :note: for gpu, if you specify dtype=float32, everything will be done
-           on the gpu.
    """
    if op:
@@ -2896,18 +3009,23 @@ def var(input, axis=None, keepdims=False):
    """
    Computes the variance along the given axis(es) of a tensor `input`.
-    :param axis: Compute the variance along this axis of the tensor.
+    Parameters
-                 None means all axes (like numpy).
+    ----------
-    :type axis: None or int or (list of int) (see `Sum`)
+    axis: None or int or (list of int) (see `Sum`)
+        Compute the variance along this axis of the tensor.
-    :param keepdims: If this is set to True, the axes which are reduced are
+        None means all axes (like numpy).
+    keepdims : bool
+        If this is set to True, the axes which are reduced are
        left in the result as dimensions with size one. With this option,
        the result will broadcast correctly against the original tensor.
-    :note: It uses the two-pass algorithm for more stable results.
+    Notes
-        https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+    -----
-        There exist other implementations that are even more stable, but
+    It uses the two-pass algorithm for more stable results.
-        probably slower.
+    https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+    There exist other implementations that are even more stable, but probably
+    slower.
    """
    input_ndim = input.type.ndim
@@ -2933,26 +3051,26 @@ def var(input, axis=None, keepdims=False):
 @constructor
 def std(input, axis=None, keepdims=False):
    """
-    Computes the standard deviation along the given axis(es)
+    Computes the standard deviation along the given axis(es) of a tensor `input`.
-    of a tensor `input`.
-    :param axis: Compute the standard deviation along this
+    Parameters
-                axis of the tensor.
+    ----------
-                 None means all axes (like numpy).
+    axis : None or int or (list of int) (see `Sum`)
-    :type axis: None or int or (list of int) (see `Sum`)
+        Compute the standard deviation along this axis of the tensor.
+        None means all axes (like numpy).
+    keepdims : bool
+        If this is set to True, the axes which are reduced are left in the
+        result as dimensions with size one. With this option, the result will
+        broadcast correctly against the original tensor.
-    :param keepdims: If this is set to True, the axes
+    Notes
-        which are reduced are
+    -----
-        left in the result as dimensions with size one.
+    It calls `var()` and `var()` uses the two-pass algorithm for more stable
-        With this option,
+    results.
-        the result will broadcast correctly against the
+    https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
-        original tensor.
+    There exist other implementations that are even more stable, but probably
+    slower.
-    :note: It calls `var()` and `var()` uses the two-pass algorithm for more
-        stable results.
-        https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
-        There exist other implementations that are even more stable, but
-        probably slower.
    """
    return sqrt(var(input=input, axis=axis, keepdims=keepdims))
@@ -2960,10 +3078,12 @@ def std(input, axis=None, keepdims=False):
 class Default(gof.Op):
    """
-    Takes an input x and a default value. If the input is not None, a
+    Takes an input x and a default value.
-    reference to it is returned. If the input is None, a copy of the
-    default value is returned instead. The input and the default must
+    If the input is not None, a reference to it is returned.
-    have exactly the same type.
+    If the input is None, a copy of the default value is returned instead.
+    The input and the default must have exactly the same type.
    """
    view_map = {0: [0]}
    __props__ = ()
@@ -2994,15 +3114,13 @@ setdefault = default  # legacy
 ##########################
 @_scal_elemwise_with_nfunc('maximum', 2, 1)
 def maximum(x, y):
-    """elemwise maximum. See max for the maximum in one tensor
+    """elemwise maximum. See max for the maximum in one tensor"""
-    """
    # see decorator for function body
 @_scal_elemwise_with_nfunc('minimum', 2, 1)
 def minimum(x, y):
-    """elemwise minimum. See min for the minimum in one tensor
+    """elemwise minimum. See min for the minimum in one tensor"""
-    """
    # see decorator for function body
@@ -3058,6 +3176,7 @@ def ceil_intdiv(a, b):
    Safely compute ceil(float_division(a, b)).
    Works for all dtypes, but mostly useful when a and b are int.
    """
    # If a and b are int with not many significant bits, we could
    # cast them to float to avoid doing the modulo. We do not know if this
@@ -3099,13 +3218,17 @@ def pow(a, b):
 # So we do not use @scal_elemwise_with_nfunc('clip', 3, 1)
 @_scal_elemwise
 def clip(x, min, max):
-    """clip x to be between min and max.
+    """
+    Clip x to be between min and max.
+    Notes
+    -----
+    When `x` is equal to the boundaries, the output is considered
+    to be `x`, so at these points, the gradient of the cost wrt the output
+    will be propagated to `x`, not to `min` nor `max`. In other words,
+    on these points, the gradient wrt `x` will be equal to the gradient wrt
+    the output, and the gradient wrt `min` and `max` will be zero.
-    :note: When `x` is equal to the boundaries, the output is considered
-        to be `x`, so at these points, the gradient of the cost wrt the output
-        will be propagated to `x`, not to `min` nor `max`. In other words,
-        on these points, the gradient wrt `x` will be equal to the gradient wrt
-        the output, and the gradient wrt `min` and `max` will be zero.
    """
    # see decorator for function body
    # for grep: clamp, bound
@@ -3125,14 +3248,16 @@ pprint.assign(pow, printing.OperatorPrinter('**', 1, 'right'))
 def extract_constant(x, elemwise=True):
-    '''
+    """
-     This function is basically a call to tensor.get_scalar_constant_value. The
+    This function is basically a call to tensor.get_scalar_constant_value.
-     main difference is the behaviour in case of failure. While
-     get_scalar_constant_value raises an TypeError, this function returns x,
+    The main difference is the behaviour in case of failure. While
-     as a tensor if possible. If x is a ScalarVariable from a
+    get_scalar_constant_value raises an TypeError, this function returns x,
-     scalar_from_tensor, we remove the conversion. If x is just a
+    as a tensor if possible. If x is a ScalarVariable from a
-     ScalarVariable, we convert it to a tensor with tensor_from_scalar.
+    scalar_from_tensor, we remove the conversion. If x is just a
-    '''
+    ScalarVariable, we convert it to a tensor with tensor_from_scalar.
+    """
    try:
        x = get_scalar_constant_value(x, elemwise=elemwise)
    except NotScalarConstantError:
@@ -3150,8 +3275,7 @@ def transpose(x, axes=None):
    """
    Reorder the dimensions of x. (Default: reverse them)
-    This is a macro around dimshuffle that matches the numpy.transpose
+    This is a macro around dimshuffle that matches the numpy.transpose function.
-    function.
    """
    if axes is None:
@@ -3164,18 +3288,33 @@ def transpose(x, axes=None):
 def batched_dot(x, y):
    """
-    :param x: A Tensor with sizes e.g.: for  3D (dim1, dim3, dim2)
-    :param y: A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
    This function computes the dot product between the two tensors, by
    iterating over the first dimension using scan.
-    Returns a tensor of size e.g. if it is 3D: (dim1, dim3, dim4)
-    Example:
+    Parameters
+    ----------
+    x : tensor
+        A Tensor with sizes e.g.: for  3D (dim1, dim3, dim2).
+    y : tensor
+        A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4).
+    Returns
+    -------
+    tensor
+        A tensor of size e.g. if it is 3D: (dim1, dim3, dim4).
+    Notes
+    -----
+    This is a subset of numpy.einsum, but we do not provide it for now.
+    But numpy einsum is slower than dot or tensordot:
+    http://mail.scipy.org/pipermail/numpy-discussion/2012-October/064259.html
+    Examples
+    --------
    >>> first = tensor.tensor3('first')
    >>> second = tensor.tensor3('second')
    >>> result = batched_dot(first, second)
-    :note:  This is a subset of numpy.einsum, but we do not provide it for now.
-    But numpy einsum is slower than dot or tensordot:
-    http://mail.scipy.org/pipermail/numpy-discussion/2012-October/064259.html
    """
    result, updates = theano.scan(
        fn=lambda x_mat, y_mat:
@@ -3188,11 +3327,22 @@ def batched_dot(x, y):
 def batched_tensordot(x, y, axes=2):
    """
-    :param x: A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2)
+    Compute the tensordot product.
-    :param y: A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
-    :param axes: an integer or array. If an integer, the number of axes
+    A hybrid of batch_dot and tensordot, this function computes the
-        to sum over. If an array, it must have two array
+    tensordot product between the two tensors, by iterating over the
-        elements containing the axes to sum over in each tensor.
+    first dimension using scan to perform a sequence of tensordots.
+    Parameters
+    ----------
+    x : tensor
+        A Tensor with sizes e.g.: for 3D (dim1, dim3, dim2)
+    y : tensor
+        A Tensor with sizes e.g.: for 3D (dim1, dim2, dim4)
+    axes: int or array-like of length 2
+        If an integer, the number of axes to sum over.
+        If an array, it must have two array elements containing the axes to sum
+        over in each tensor.
        If an integer i, it is converted to an array containing
        the last i dimensions of the first tensor and the first
@@ -3206,11 +3356,7 @@ def batched_tensordot(x, y, axes=2):
        (Remember axes are zero-indexed!) The 2nd axis of a and the
        3rd axis of b must have the same shape; the same is true for
        the 3rd axis of a and the 5th axis of b.
-    :type axes: int or array-like of length 2
-    A hybrid of batch_dot and tensordot, this function computes the
-    tensordot product between the two tensors, by iterating over the
-    first dimension using scan to perform a sequence of tensordots.
    """
    if isinstance(axes, (list, numpy.ndarray)):
        if isinstance(axes, list):
@@ -3239,20 +3385,17 @@ def split(x, splits_size, n_splits, axis=0):
 class Split(Op):
    """Partition a `TensorVariable` along some axis.
-    .. python::
+    Examples
+    --------
-        x = vector()
+    >>> x = vector()
-        splits = lvector()
+    >>> splits = lvector()
-        # you have to declare right away how many split_points there will be.
+    You have to declare right away how many split_points there will be.
-        ra, rb, rc = split(x, splits, n_splits = 3, axis = 0)
+    >>> ra, rb, rc = split(x, splits, n_splits = 3, axis = 0)
+    >>> f = function([x, splits], [ra, rb, rc])
-        f = function([x, splits], [ra, rb, rc])
+    >>> a, b, c = f([0,1,2,3,4,5], [3, 2, 1])
+    a == [0,1,2]
-        a, b, c = f([0,1,2,3,4,5], [3, 2, 1])
+    b == [3, 4]
+    c == [5]
-        #a == [0,1,2]
-        #b == [3, 4]
-        #c == [5]
    """
@@ -3370,6 +3513,7 @@ class Split(Op):
 def addbroadcast(x, *axes):
    """
    Make the input broadcastable in the specified axes.
    For example, addbroadcast(x, 0) will make the first dimension of
    x broadcastable. When performing the function, if the length of
    x along that dimension is not 1, a ValueError will be raised.
@@ -3377,20 +3521,19 @@ def addbroadcast(x, *axes):
    We apply the opt here not to pollute the graph especially during
    the gpu optimization
-    Parameters:
+    Parameters
-    ------------
-        x : tensor_like
-            Input theano tensor.
-        axis : an int or an iterable object such as list or tuple
-               of int values
-               The dimension along which the tensor x should be
-               broadcastable.  if the length of x along these
-               dimensions is not 1, a ValueError will be raised.
-    returns:
    ----------
-        a theano tensor, which is broadcastable along the specified dimensions.
+    x : tensor_like
+        Input theano tensor.
+    axis : an int or an iterable object such as list or tuple of int values
+        The dimension along which the tensor x should be broadcastable.
+        If the length of x along these dimensions is not 1, a ValueError will
+        be raised.
+    Returns
+    -------
+    tensor
+        A theano tensor, which is broadcastable along the specified dimensions.
    """
    rval = Rebroadcast(*[(axis, True) for axis in axes])(x)
@@ -3400,6 +3543,7 @@ def addbroadcast(x, *axes):
 def unbroadcast(x, *axes):
    """
    Make the input impossible to broadcast in the specified axes.
    For example, addbroadcast(x, 0) will make the first dimension
    of x broadcastable. When performing the function, if the length
    of x along that dimension is not 1, a ValueError will be raised.
@@ -3407,20 +3551,19 @@ def unbroadcast(x, *axes):
    We apply the opt here not to pollute the graph especially during
    the gpu optimization
-    Parameters:
+    Parameters
-    ------------
-        x : tensor_like
-            Input theano tensor.
-        axis : an int or an iterable object such as list or tuple
-               of int values
-               The dimension along which the tensor x should be
-               unbroadcastable.  if the length of x along these
-               dimensions is not 1, a ValueError will be raised.
-    returns:
    ----------
-        a theano tensor, which is unbroadcastable along the specified dimensions.
+    x : tensor_like
+        Input theano tensor.
+    axis : an int or an iterable object such as list or tuple of int values
+        The dimension along which the tensor x should be unbroadcastable.
+        If the length of x along these dimensions is not 1, a ValueError will
+        be raised.
+    Returns
+    -------
+    tensor
+        A theano tensor, which is unbroadcastable along the specified dimensions.
    """
    rval = Rebroadcast(*[(axis, False) for axis in axes])(x)
@@ -3430,7 +3573,8 @@ def unbroadcast(x, *axes):
 def patternbroadcast(x, broadcastable):
    """
    Make the input adopt a specific broadcasting pattern.
-    broadcastable must be iterable. For example,
+    Broadcastable must be iterable. For example,
    patternbroadcast(x, (True, False)) will make the first
    dimension of x broadcastable and the second dimension
    not broadcastable, so x will now be a row.
@@ -3438,21 +3582,20 @@ def patternbroadcast(x, broadcastable):
    We apply the opt here not to pollute the graph especially during the gpu
    optimization.
-    Parameters:
+    Parameters
-    ------------
+    ----------
-        x : tensor_like
+    x : tensor_like
-            Input theano tensor.
+        Input theano tensor.
-        broadcastable : an iterable object such as list or tuple
+    broadcastable : an iterable object such as list or tuple of bool values
-                        of bool values
+        A set of boolean values indicating whether a dimension should be
+        broadcastable or not. If the length of x along these dimensions is
+        not 1, a ValueError will be raised.
-            a set of boolean values indicating whether a dimension
+    Returns
-            should be broadcastable or not.
+    -------
-            if the length of x along these dimensions is not 1,
+    tensor
-            a ValueError will be raised.
+        A theano tensor, which is unbroadcastable along the specified dimensions.
-    returns:
-    ----------
-        a theano tensor, which is unbroadcastable along the specified dimensions.
    """
    rval = Rebroadcast(*[(i, broadcastable[i])
                         for i in xrange(len(broadcastable))])(x)
@@ -3468,31 +3611,39 @@ class Join(Op):
    Of course, TensorVariable instances do not have a shape, so this error
    cannot be caught until runtime.  See `perform()`.
-    For joins involving scalar values, see @stack.
+    See Also
+    --------
+    stack : For joins involving scalar values
-    .. python::
+    Examples
+    --------
+    >>> x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix()
+    >>> u = tensor.vector()
-        x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix()
+    >>> r = join(0, x, y, z)
-        u = tensor.vector()
+    >>> c = join(1, x, y, z)
+    >>> join(2, x, y, z)    # WRONG: the axis has to be an index into the shape
+    >>> join(0, x, u)       # WRONG: joined tensors must have the same rank
-        r = join(0, x, y, z)
-        c = join(1, x, y, z)
-        join(2, x, y, z)    # WRONG: the axis has to be an index into the shape
-        join(0, x, u)       # WRONG: joined tensors must have the same rank
    """
    check_input = False
    __props__ = ()
    def make_node(self, *axis_and_tensors):
        """
-        :param axis: an Int or integer-valued Variable
+        Parameters
+        ----------
-        :param tensors: a variable number (but not zero) of tensors to
+        axis: an Int or integer-valued Variable
-          concatenate along the specified axis.  These tensors must have
+        tensors
-          the same shape along all dimensions other than this axis.
+            A variable number (but not zero) of tensors to
+            concatenate along the specified axis.  These tensors must have
-        :returns: a symbolic Variable.  It has the same ndim as the
+            the same shape along all dimensions other than this axis.
-            input tensors, and the most inclusive dtype.
+        Returns
+        -------
+        A symbolic Variable
+            It has the same ndim as the input tensors, and the most inclusive
+            dtype.
        """
        axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
@@ -3709,26 +3860,25 @@ class Join(Op):
 """
    Convenience function to concatenate `TensorType`s along the given axis.
-    :Parameters:
+    Parameters
-     - `tensors` : list of tensors (or list-like)
+    ----------
-       A list of tensors to be concatenated along the given axis.
+    tensors : list of tensors (or list-like)
-     - `axis` : int (symbolic or literal)
+        A list of tensors to be concatenated along the given axis.
+        The shapes of the tensors to be concatenated must be all
-       On which dimension should the tensors be joined?  The `axis`
+        identical, except in the dimension (`axis`) on which they are to
-       must be a valid index into the shape of the tensors to be
+        be joined.
-       concatenated.
+    axis : int (symbolic or literal)
+        On which dimension should the tensors be joined?  The `axis`
-       The `axis` parameter may either be an integer or an object that
+        must be a valid index into the shape of the tensors to be
-       can be converted to a scalar using `as_scalar`(`axis`). In the
+        concatenated.
-       former case, the axis is fixed at construction, while in the
+        The `axis` parameter may either be an integer or an object that
-       latter it may vary over time depending on the value of the
+        can be converted to a scalar using `as_scalar`(`axis`). In the
-       `axis` variable.
+        former case, the axis is fixed at construction, while in the
+        latter it may vary over time depending on the value of the
+        `axis` variable.
-    The shapes of the tensors to be concatenated must be all
+"""
-    identical, except in the dimension (`axis`) on which they are to
-    be joined.
-    """
 join = Join()
 pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join),
@@ -3738,7 +3888,8 @@ pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join),
 def roll(x, shift, axis=None):
    """
    Convenience function to roll `TensorType`s along the given axis.
-    Syntax copies numpy.roll function
+    Syntax copies numpy.roll function.
    Parameters
    ----------
@@ -3746,7 +3897,7 @@ def roll(x, shift, axis=None):
        Input tensor.
    shift : int (symbolic or literal)
        The number of places by which elements are shifted.
-    axis : int (symbolic or literal) (optional)
+    axis : int (symbolic or literal), optional
        The axis along which elements are shifted. By default, the array
        is flattened before shifting, after which the original
        shape is restored.
@@ -3755,6 +3906,7 @@ def roll(x, shift, axis=None):
    -------
    res : tensor
        Output tensor, with the same shape as `x`.
    """
    if axis is None:
        if x.ndim > 1:
@@ -3780,9 +3932,13 @@ def roll(x, shift, axis=None):
 @constructor
 def shape_padleft(t, n_ones=1):
-    """Reshape `t` by left-padding the shape with `n_ones` 1s
+    """Reshape `t` by left-padding the shape with `n_ones` 1s.
+    See Also
+    --------
+    shape_padright
+    Dimshuffle
-    See also: `shape_padright` and `Dimshuffle`
    """
    _t = as_tensor_variable(t)
@@ -3792,9 +3948,13 @@ def shape_padleft(t, n_ones=1):
 @constructor
 def shape_padright(t, n_ones=1):
-    """Reshape `t` by right-padding the shape with `n_ones` 1s
+    """Reshape `t` by right-padding the shape with `n_ones` 1s.
+    See Also
+    --------
+    shape_padleft
+    Dimshuffle
-    See also: `shape_padleft` and `Dimshuffle`
    """
    _t = as_tensor_variable(t)
@@ -3808,6 +3968,7 @@ def stack(*tensors):
    The size in dimension 0 of the result will be equal to the number
    of tensors passed.
    """
    if len(tensors) == 0:
        raise Exception('theano.tensor.stack(*tensors) must have at least'
@@ -3843,9 +4004,10 @@ def concatenate(tensor_list, axis=0):
    This function is similar to `join`, but uses the signature of
    numpy's concatenate function.
-    This function
+    Raises
-    :Exceptions:
+    ------
-     - `TypeError` : the tensor_list must be a tuple or list
+    TypeError
+        The tensor_list must be a tuple or list.
    """
    # Check someone did not make the common mistake to do something like:
@@ -3863,16 +4025,20 @@ def concatenate(tensor_list, axis=0):
 def get_vector_length(v):
    """Return the run-time length of a symbolic vector.
-    :Parameters:
+    Parameters
-     - `v` : A rank-1 TensorType variable.
+    ----------
+    v
-    :Exceptions:
+        A rank-1 TensorType variable.
-     - `TypeError` : `v` hasn't the proper type.
-     - `ValueError` : No special case applies, the length is not known.
-    In general this is not possible, but for a number of special cases
+    Raises
-    the length can be determined at compile / graph-construction time.
+    ------
-    This function implements these special cases.
+    TypeError
+        `v` hasn't the proper type.
+    ValueError
+        No special case applies, the length is not known.
+        In general this is not possible, but for a number of special cases
+        the length can be determined at compile / graph-construction time.
+        This function implements these special cases.
    """
    v = as_tensor_variable(v)
@@ -3909,9 +4075,11 @@ def get_vector_length(v):
 def horizontal_stack(*args):
    """
    Horizontally stack two L{TensorType}s.
    Stack two L{TensorType}s along the second axis (column wise). These
    L{TensorType}s must have the same shape along all dimensions but the
    second.
    """
    # Note: 'horizontal_stack' and 'vertical_stack' do not behave exactly like
    # Numpy's hstack and vstack functions. This is intended, because Numpy's
@@ -3937,7 +4105,9 @@ class Reshape(Op):
    """Perform a reshape operation of the input x to the new shape shp.
    The number of dimensions to which to reshape to (ndim) must be
-    known at graph build time."""
+    known at graph build time.
+    """
    view_map = {0: [0]}  # output 0 is potentially aliased to inputs [0]
    _f16_ok = True
@@ -4131,8 +4301,11 @@ def reshape(x, newshape, ndim=None, name=None):
 class Flatten(Op):
    """
+    Flatten a tensor.
    Flattens a tensor to `outdim` dimensions by preserving the leading
    outdim - 1 shape components.
    """
    view_map = {0: [0]}
@@ -4305,16 +4478,19 @@ def flatten(x, outdim=1):
 class Tile(Op):
    """
-    DEPRECATED: use tile() instead.
    Construct an array by repeating the input x according to reps pattern.
+    .. note:: Deprecated
+              Use tile() instead.
    Tiles its input according to reps. The length of reps is the number of
    dimension of x and contains the number of times to tile x in each
    dimension.
-    :see: `numpy.tile
+    See Also
-    <http://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html>`_
+    --------
+    numpy.tile : http://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html
    """
    __props__ = ("ndim",)
@@ -4377,13 +4553,15 @@ class Tile(Op):
 def tile(x, reps, ndim=None):
    """
-    Tile input array `x` according to `reps`. See the docstring of `numpy.tile`
+    Tile input array `x` according to `reps`.
-    for details.
+    See the docstring of `numpy.tile` for details.
    Currently, x.ndim and len(reps) must be equal, and, if specified, 'ndim'
    must be equal to both.
    TODO: expand this.
    """
    try:
@@ -4420,6 +4598,7 @@ class ARange(Op):
    """Create an array containing evenly spaced values within a given interval.
    Parameters and behaviour are the same as numpy.arange().
    """
    __props__ = ("dtype",)
@@ -4550,13 +4729,13 @@ class _nd_grid(object):
    to their numpy equivalents.
    Parameters
-    ==========
+    ----------
-        sparse : boolean, optional, default=True
+    sparse : boolean, optional, default=True
-            Specifying False leads to the equivalent of numpy's mgrid
+        Specifying False leads to the equivalent of numpy's mgrid functionality.
-            functionality. Specifying True leads to the equivalent of ogrid.
+        Specifying True leads to the equivalent of ogrid.
    Examples
-    ========
+    --------
    >>> a = T.mgrid[0:5, 0:3]
    >>> a[0].eval()
    array([[0, 0, 0],
@@ -4570,7 +4749,6 @@ class _nd_grid(object):
           [0, 1, 2],
           [0, 1, 2],
           [0, 1, 2]], dtype=int8)
    >>> b = T.ogrid[0:5, 0:3]
    >>> b[0].eval()
    array([[0],
@@ -4580,6 +4758,7 @@ class _nd_grid(object):
           [4]], dtype=int8)
    >>> b[1].eval()
    array([[0, 1, 2, 3]], dtype=int8)
    """
    def __init__(self, sparse=False):
@@ -4689,11 +4868,19 @@ class PermuteRowElements(Op):
        The terminal case is reached when the current tensors are vector,
        then the permutation contained in y is applied to x.
-        :param x: The input tensor, on which the permutation is applied
+        Parameters
-        :param y: Tensor containing the permutations to apply
+        ----------
-        :param out: Tensor storing the output result
+        x : tensor
-        :param curdim: Counter of the current depth of recursion
+            The input tensor, on which the permutation is applied.
-        :param inverse: Wether to apply permutations or their inverse
+        y : tensor
+            Tensor containing the permutations to apply.
+        out : tensor
+            Tensor storing the output result.
+        curdim : int
+            Counter of the current depth of recursion.
+        inverse
+            Wether to apply permutations or their inverse.
        """
        if len(x.shape) == 1:
            # Numpy advanced indexing works in this case
@@ -4817,7 +5004,9 @@ def permute_row_elements(x, y, inverse=0):
 def inverse_permutation(perm):
    """Computes the inverse of permutations.
    Each row of input should contain a permutation of the first integers.
    """
    return permute_row_elements(
        arange(perm.shape[-1], dtype=perm.dtype),
@@ -4840,14 +5029,14 @@ class Dot(Op):
    equivalent to matrix multiplication. For two vectors, this is the inner
    product.
-    :note: matrix-matrix products are sometimes optimized to Dot22 or Gemm ops.
+    Notes
-    (see tensor.blas)
+    -----
+    Matrix-matrix products are sometimes optimized to Dot22 or Gemm ops
-    :note: vector-vector products are sometimes optimized to Ger or CGer.  (see
+    (see tensor.blas).
-    tensor.blas)
+    Vector-vector products are sometimes optimized to Ger or CGer (see
+    tensor.blas).
-    :note: matrix-vector products are sometimes optimized to Gemv, CGemv (see
+    Matrix-vector products are sometimes optimized to Gemv, CGemv (see
-    tensor.blas)
+    tensor.blas).
    """
    __props__ = ()
@@ -5031,10 +5220,12 @@ pprint.assign(_dot, printing.OperatorPrinter(printing.special['middle_dot'],
 def dot(a, b):
    """
-    Computes the dot product of two variables. For two matrices, this is
+    Computes the dot product of two variables.
-    equivalent to matrix multiplication. For two vectors, this is the inner
-    product. When one variable is a scalar, this is like elementwise
+    For two matrices, this is equivalent to matrix multiplication.
-    multiplication.  For N dimensions, this is a sum product over the last axis
+    For two vectors, this is the inner product.
+    When one variable is a scalar, this is like elementwise multiplication.
+    For N dimensions, this is a sum product over the last axis
    of the first array and the second-to-last axis of the second array:
        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
@@ -5054,14 +5245,14 @@ def dot(a, b):
        3.  If both a and b have either 1 or 2 dimensions, it calls Theano's
            Dot op on a and b.
-    :note: matrix-matrix products are sometimes optimized to Dot22 or Gemm ops.
+    Notes
-    (see tensor.blas)
+    -----
+    Matrix-matrix products are sometimes optimized to Dot22 or Gemm ops
-    :note: vector-vector products are sometimes optimized to Ger or CGer.  (see
+    (see tensor.blas).
-    tensor.blas)
+    Vector-vector products are sometimes optimized to Ger or CGer (see
+    tensor.blas).
-    :note: matrix-vector products are sometimes optimized to Gemv, CGemv (see
+    Matrix-vector products are sometimes optimized to Gemv, CGemv (see
-    tensor.blas)
+    tensor.blas).
    """
    a, b = as_tensor_variable(a), as_tensor_variable(b)
@@ -5080,46 +5271,52 @@ def dot(a, b):
 def tensordot(a, b, axes=2):
    """
-    Given two tensors a and b,tensordot computes a generalized dot product over
+    Compute a generalized dot product over provided axes.
+    Given two tensors a and b, tensordot computes a generalized dot product over
    the provided axes. Theano's implementation reduces all expressions to
    matrix or vector dot products and is based on code from Tijmen Tieleman's
    gnumpy (http://www.cs.toronto.edu/~tijmen/gnumpy.html).
-    :param a: the first tensor variable
+    Parameters
-    :type a: symbolic tensor
+    ----------
+    a: symbolic tensor
-    :param b: the second tensor variable
+        The first tensor variable.
-    :type b: symbolic tensor
+    b: symbolic tensor
+        The second tensor variable
-    :param axes: an integer or array. If an integer, the number of axes
+    axes: int or array-like of length 2
-                 to sum over. If an array, it must have two array
+        If an integer, the number of axes to sum over.
-                 elements containing the axes to sum over in each tensor.
+        If an array, it must have two array elements containing the axes
+        to sum over in each tensor.
-                 Note that the default value of 2 is not guaranteed to work
-                 for all values of a and b, and an error will be raised if
+        Note that the default value of 2 is not guaranteed to work
-                 that is the case. The reason for keeping the default is to
+        for all values of a and b, and an error will be raised if
-                 maintain the same signature as numpy's tensordot function
+        that is the case. The reason for keeping the default is to
-                 (and np.tensordot raises analogous errors for non-compatible
+        maintain the same signature as numpy's tensordot function
-                 inputs).
+        (and np.tensordot raises analogous errors for non-compatible
+        inputs).
-                 If an integer i, it is converted to an array containing
-                 the last i dimensions of the first tensor and the first
-                 i dimensions of the second tensor:
-                     axes = [list(range(a.ndim - i, b.ndim)), list(range(i))]
-                 If an array, its two elements must contain compatible axes
-                 of the two tensors. For example, [[1, 2], [2, 0]] means sum
-                 over the 2nd and 3rd axes of a and the 3rd and 1st axes of b.
-                 (Remember axes are zero-indexed!) The 2nd axis of a and the
-                 3rd axis of b must have the same shape; the same is true for
-                 the 3rd axis of a and the 1st axis of b.
-    :type axes: int or array-like of length 2
-    :returns: a tensor with shape equal to the concatenation of a's shape
-              (less any dimensions that were summed over) and b's shape
-              (less any dimensions that were summed over).
-    :rtype: symbolic tensor
+        If an integer i, it is converted to an array containing
+        the last i dimensions of the first tensor and the first
+        i dimensions of the second tensor:
+            axes = [list(range(a.ndim - i, b.ndim)), list(range(i))]
+        If an array, its two elements must contain compatible axes
+        of the two tensors. For example, [[1, 2], [2, 0]] means sum
+        over the 2nd and 3rd axes of a and the 3rd and 1st axes of b.
+        (Remember axes are zero-indexed!) The 2nd axis of a and the
+        3rd axis of b must have the same shape; the same is true for
+        the 3rd axis of a and the 1st axis of b.
+    Returns
+    -------
+    symbolic tensor
+        A tensor with shape equal to the concatenation of a's shape
+        (less any dimensions that were summed over) and b's shape
+        (less any dimensions that were summed over).
+    Examples
+    --------
    It may be helpful to consider an example to see what tensordot does.
    Theano's implementation is identical to NumPy's. Here a has shape (2, 3, 4)
    and b has shape (5, 6, 4, 3). The axes to sum over are [[1, 2], [3, 2]] --
@@ -5127,29 +5324,30 @@ def tensordot(a, b, axes=2):
    are compatible. The resulting tensor will have shape (2, 5, 6) -- the
    dimensions that are not being summed:
-        a = np.random.random((2,3,4))
+    >>> a = np.random.random((2,3,4))
-        b = np.random.random((5,6,4,3))
+    >>> b = np.random.random((5,6,4,3))
-        #tensordot
+    #tensordot
-        c = np.tensordot(a, b, [[1,2],[3,2]])
+    >>> c = np.tensordot(a, b, [[1,2],[3,2]])
-        #loop replicating tensordot
+    #loop replicating tensordot
-        a0, a1, a2 = a.shape
+    >>> a0, a1, a2 = a.shape
-        b0, b1, _, _ = b.shape
+    >>> b0, b1, _, _ = b.shape
-        cloop = np.zeros((a0,b0,b1))
+    >>> cloop = np.zeros((a0,b0,b1))
-        #loop over non-summed indices -- these exist
+    #loop over non-summed indices -- these exist
-        #in the tensor product.
+    #in the tensor product.
-        for i in range(a0):
+    >>> for i in range(a0):
-            for j in range(b0):
+    ...     for j in range(b0):
-                for k in range(b1):
+    ...         for k in range(b1):
-                    #loop over summed indices -- these don't exist
+    ...             #loop over summed indices -- these don't exist
-                    #in the tensor product.
+    ...             #in the tensor product.
-                    for l in range(a1):
+    ...             for l in range(a1):
-                        for m in range(a2):
+    ...                 for m in range(a2):
-                            cloop[i,j,k] += a[i,l,m] * b[j,k,m,l]
+    ...                     cloop[i,j,k] += a[i,l,m] * b[j,k,m,l]
-        np.allclose(c, cloop) #true
+    >>> np.allclose(c, cloop)
+    true
    This specific implementation avoids a loop by transposing a and b such that
    the summed axes of a are last and the summed axes of b are first. The
@@ -5160,12 +5358,16 @@ def tensordot(a, b, axes=2):
    In an extreme case, no axes may be specified. The resulting tensor
    will have shape equal to the concatenation of the shapes of a and b:
-        c = np.tensordot(a, b, 0)
+    >>> c = np.tensordot(a, b, 0)
-        print(a.shape) #(2,3,4)
+    >>> print(a.shape)
-        print(b.shape) #(5,6,4,3)
+    (2,3,4)
-        print(c.shape) #(2,3,4,5,6,4,3)
+    >>> print(b.shape)
+    (5,6,4,3)
+    >>> print(c.shape)
+    (2,3,4,5,6,4,3)
    See the documentation of numpy.tensordot for more examples.
    """
    a, b = as_tensor_variable(a), as_tensor_variable(b)
@@ -5275,6 +5477,7 @@ def outer(x, y):
    """Return vector-vector outer product.
    If an input isn't a vector, we flatten it first.
    """
    if x.ndim != 1:
        x = x.flatten()
@@ -5310,9 +5513,15 @@ del x
 class Diagonal(Op):
    """Return specified diagonals.
-    :param x: A tensor variable with x.ndim >= 2.
+    Parameters
+    ----------
+    x
+        A tensor variable with x.ndim >= 2.
+    Returns
+    -------
+        A vector representing the diagonal elements.
-    :return: A vector representing the diagonal elements.
    """
    __props__ = ("offset", "axis1", "axis2")
@@ -5402,6 +5611,8 @@ def stacklists(arg):
    This function can create a tensor from a shaped list of scalars:
+    Examples
+    --------
    >>> from theano.tensor import stacklists, scalars, matrices
    >>> from theano import function
    >>> a, b, c, d = scalars('abcd')
@@ -5421,6 +5632,7 @@ def stacklists(arg):
    >>> x = ones((4, 4), 'float32')
    >>> f(x, x, x, x).shape
    (2, 2, 4, 4)
    """
    if isinstance(arg, (tuple, list)):
        return stack(*list(map(stacklists, arg)))
@@ -5434,12 +5646,17 @@ def ptp(a, axis=None):
    The name of the function comes from the acronym for peak to peak.
-    :param a : Input tensor.
+    Parameters
+    ----------
+    a
+        Input tensor.
+    axis
+        Axis along which to find the peaks. By default, flatten the array.
-    :param axis : Axis along which to find the peaks. By default,
+    Returns
-                flatten the array.
+    -------
+        A new array holding the result.
-    :return : A new array holding the result.
    """
    a = as_tensor_variable(a)
@@ -5495,28 +5712,36 @@ def choose(a, choices, out=None, mode='raise'):
      negative integers are mapped to 0; values greater than n-1 are mapped
      to n-1; and then the new array is constructed as above.
-    :Parameter: *a* - int array
+    Parameters
+    ----------
+    a : int array
        This array must contain integers in [0, n-1], where n is the number of
        choices, unless mode=wrap or mode=clip, in which cases any integers
        are permissible.
-    :Parameter: *choices* - sequence of arrays
+    choices : sequence of arrays
        Choice arrays. a and all of the choices must be broadcastable to
        the same shape. If choices is itself an array (not recommended),
        then its outermost dimension (i.e., the one corresponding to
        choices.shape[0]) is taken as defining the ``sequence``.
-    :Parameter: *out* - array, optional
+    out : array, optional
        If provided, the result will be inserted into this array.
        It should be of the appropriate shape and dtype.
-    :Parameter: *mode* - {``raise`` (default), ``wrap``, ``clip``}, optional
+    mode : {``raise`` (default), ``wrap``, ``clip``}, optional
        Specifies how indices outside [0, n-1] will be treated:
        ``raise`` : an exception is raised
        ``wrap`` : value becomes value mod n
        ``clip`` : values < 0 are mapped to 0, values > n-1 are mapped to n-1
-    :Returns: merged_array - array
+    Returns
+    -------
+    merged_array - array
        The merged result.
-    :Raises:
-        ValueError - shape mismatch
+    Raises
+    ------
+    ValueError - shape mismatch
        If a and each choice array are not all broadcastable to the same shape.
    """
    # This is done to keep the same function signature then NumPy.
    assert out is None
@@ -5609,6 +5834,7 @@ class Choose(Op):
 class AllocEmpty(gof.Op):
    """Implement Alloc on the cpu, but without initializing memory."""
    __props__ = ("dtype",)
    # specify the type of the data