Merge pull request #3293 from harlouci/numpydoc_tensor

Numpydoc tensor

Merge pull request #3293 from harlouci/numpydoc_tensor
6304a061 · abergeron · 5e536853 · 8e88a292 · 6304a061 · 6304a061
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -17,10 +17,12 @@ There are four kinds of BLAS Ops in Theano:
    - C-based (blas_c)
    - CUDA-based (theano.sandbox.cuda.blas)

-:note: Unfortunately (because it's confusing) this file currently contains Ops
-    that contain both Python and C versions.  I think it would be better to
-    move the C implementations to blas_c so that this file is pure Python.
-    -JB
+Notes
+-----
+Unfortunately (because it's confusing) this file currently contains Ops
+that contain both Python and C versions.  I think it would be better to
+move the C implementations to blas_c so that this file is pure Python.
+-JB


 Ops
@@ -121,7 +123,6 @@ Specialize Gemm to Gemv
 If arguments to GEMM are dimshuffled vectors, then we can use GEMV
 instead. This optimization is `local_gemm_to_gemv`.

-
 """
 from __future__ import print_function
 import copy
@@ -359,7 +360,9 @@ class Gemv(Op):
    x, y are vectors
    alpha, beta are scalars
    output is a vector that can be inplace on y
+
    """
+
    __props__ = ("inplace",)

    def __init__(self, inplace):
@@ -443,12 +446,13 @@ class Ger(Op):
    for matrix A, scalar alpha, vectors x and y.

    This interface to GER allows non-destructive operation on A via the
-    `destructive`
-    argument to the constructor.
+    `destructive` argument to the constructor.

    :TODO: Create better classes ScipyGer and CGer that inherit from this class
    and override the make_thunk() method to use Scipy and C respectively.
+
    """
+
    __props__ = ("destructive",)

    def __init__(self, destructive):
@@ -508,16 +512,22 @@ def ldflags(libs=True, flags=False, libs_dir=False, include_dir=False):
    It returns a list of libraries against which an Op's object file
    should be linked to benefit from a BLAS implementation.

-    :type libs: bool, defaults to True
-    :param libs: extract flags starting with "-l"
-    :type libs_dir: bool, defaults to False
-    :param libs_dir: extract flags starting with "-L"
-    :type include_dir: bool, defaults to False
-    :param include_dir: extract flags starting with "-I"
-    :type flags: bool, defaults to False
-    :param flags: extract all the other flags
-    :rtype: list of strings
-    :returns: extracted flags
+    Parameters
+    ----------
+    libs : bool, optional
+        Extract flags starting with "-l" (the default is True).
+    libs_dir : bool, optional
+        Extract flags starting with "-L" (the default is False).
+    include_dir : bool, optional
+        Extract flags starting with "-I" (the default is False).
+    flags: bool, optional
+        Extract all the other flags (the default is False).
+
+    Returns
+    -------
+    list of strings
+        Extracted flags.
+
    """
    ldflags_str = theano.config.blas.ldflags
    return _ldflags(ldflags_str=ldflags_str,
@@ -533,19 +543,25 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir):

    Depending on the options, different type of flags will be kept.

-    :type ldflags_str: string
-    :param ldflags_str: the string to process. Typically, this will
-        be the content of `theano.config.blas.ldflags`
-    :type libs: bool
-    :param libs: extract flags starting with "-l"
-    :type libs_dir: bool
-    :param libs_dir: extract flags starting with "-L"
-    :type include_dir: bool
-    :param include_dir: extract flags starting with "-I"
-    :type flags: bool
-    :param flags: extract all the other flags
-    :rtype: list of strings
-    :returns: extracted flags
+    Parameters
+    ----------
+    ldflags_str : string
+        The string to process. Typically, this will be the content of
+        `theano.config.blas.ldflags`.
+    libs : bool
+        Extract flags starting with "-l".
+    flags: bool
+        Extract all the other flags.
+    libs_dir: bool
+        Extract flags starting with "-L".
+    include_dir: bool
+        Extract flags starting with "-I".
+
+    Returns
+    -------
+    list of strings
+        Extracted flags.
+
    """
    rval = []
    if libs_dir:
@@ -598,10 +614,12 @@ def _ldflags(ldflags_str, libs, flags, libs_dir, include_dir):


 class GemmRelated(Op):
-    """Base class for Gemm and Dot22
+    """Base class for Gemm and Dot22.

    This class provides a kind of templated gemm Op.
+
    """
+
    __props__ = ()

    def c_support_code(self):
@@ -915,7 +933,7 @@ class GemmRelated(Op):


 class Gemm(GemmRelated):
-    """In-place version of matrix-matrix multiplication (with accumulation):
+    """In-place version of matrix-matrix multiplication (with accumulation).

    When a and b are scalars and x, y, and z are matrices, then

@@ -936,6 +954,7 @@ class Gemm(GemmRelated):
    optimized linear algebra operations.)

    """
+
    E_rank = 'gemm only works for rank 2'
    E_scalar = 'gemm requires scalar argument'
    E_z_uniq = 'argument z aliased to x or y'  # TODO: justify / delete this
@@ -1430,9 +1449,10 @@ def _factor_canonicalized(lst):


 def _gemm_from_factored_list(lst):
-    """Returns None, or a list to replace node.outputs
    """
+    Returns None, or a list to replace node.outputs.

+    """
    lst2 = []
    # Remove the tuple that can't be cast correctly.
    # This can happen when we try to cast a complex to a real
@@ -1524,7 +1544,7 @@ def _gemm_from_node2(node):


 class GemmOptimizer(Optimizer):
-    """Graph optimizer for inserting Gemm operations"""
+    """Graph optimizer for inserting Gemm operations."""
    def __init__(self):
        Optimizer.__init__(self)
        self.warned = False
@@ -1645,8 +1665,11 @@ class GemmOptimizer(Optimizer):

 class Dot22(GemmRelated):
    """Compute a matrix-matrix product.
-    This is a specialization of the more general Dot()
+
+    This is a specialization of the more general Dot().
+
    """
+
    def make_node(self, x, y):
        dtypes = ('float32', 'float64', 'complex64', 'complex128')
        if x.type.ndim != 2 or x.type.dtype not in dtypes:
@@ -1780,8 +1803,7 @@ def local_inplace_ger(node):

 @local_optimizer([gemm_no_inplace])
 def local_gemm_to_gemv(node):
-    """GEMM acting on row or column matrices -> GEMV
-    """
+    """GEMM acting on row or column matrices -> GEMV."""
    if node.op == gemm_no_inplace:
        z, a, x, y, b = node.inputs
        if z.broadcastable == x.broadcastable == (True, False):
@@ -1794,8 +1816,7 @@ def local_gemm_to_gemv(node):

 @local_optimizer([gemm_no_inplace])
 def local_gemm_to_ger(node):
-    """GEMM computing an outer-product -> GER
-    """
+    """GEMM computing an outer-product -> GER."""
    if node.op == gemm_no_inplace:
        z, a, x, y, b = node.inputs
        if x.broadcastable[1] and y.broadcastable[0]:
@@ -1825,8 +1846,7 @@ def local_gemm_to_ger(node):
 #      working
 @local_optimizer([_dot22])
 def local_dot22_to_ger_or_gemv(node):
-    """dot22 computing an outer-product -> GER
-    """
+    """dot22 computing an outer-product -> GER."""
    if node.op == _dot22:
        x, y = node.inputs
        xb = x.broadcastable
@@ -1904,11 +1924,14 @@ optdb.register('InplaceBlasOpt',

 class Dot22Scalar(GemmRelated):
    """Compute a matrix-matrix product.
+
    This is a specialization of the more general Dot()
    Used to call optimized gemm implementation.
    Also used to generate a gemm later.
-    compute scalar*dot(x,y)
+    compute scalar*dot(x,y).
+
    """
+
    def make_node(self, x, y, a):
        if a.ndim != 0:
            raise TypeError(Gemm.E_scalar, a)
@@ -1996,25 +2019,27 @@ _dot22scalar = Dot22Scalar()
 @local_optimizer([T.mul])
 def local_dot22_to_dot22scalar(node):
    """
-    :note: Previous attempts to alter this optimization to replace dot22 with
-        gemm instead of dot22scalar resulted in some Scan nodes being
-        duplicated and the ScanSaveMem optimization never running on them,
-        resulting in highly increased memory usage. Until this issue is
-        resolved, this optimization should keep using dot22scalar instead of
-        gemm.
-
-    :note: we upcast the scalar if after the multiplication with the
-        dot this give the same type.
-
-    .. note: We execute this optimizer after the gemm optimizer. This
-        allow to give more priority to gemm that give more speed up
-        then this optimizer, but allow the gemm optimizer to ignore
-        this op.
-
+    Notes
+    -----
+    Previous attempts to alter this optimization to replace dot22 with
+    gemm instead of dot22scalar resulted in some Scan nodes being
+    duplicated and the ScanSaveMem optimization never running on them,
+    resulting in highly increased memory usage. Until this issue is
+    resolved, this optimization should keep using dot22scalar instead of
+    gemm.
+
+    We upcast the scalar if after the multiplication with the dot this give
+    the same type.
+
+    We execute this optimizer after the gemm optimizer. This
+    allow to give more priority to gemm that give more speed up
+    then this optimizer, but allow the gemm optimizer to ignore
+    this op.

    TODO: support when we can reorder the mul to generate a
    dot22scalar or fix the canonizer to merge them(1 mul with multiple
    inputs)
+
    """
    if node.op != T.mul:
        return False
@@ -2102,7 +2127,6 @@ def local_dot22_to_dot22scalar(node):
        return [T.mul(_dot22scalar(d.owner.inputs[0],
                                   d.owner.inputs[1], a), *o)]

-
 # must happen after gemm as the gemm optimizer don't understant
 # dot22scalar and gemm give more speed up then dot22scalar
 blas_optdb.register('local_dot22_to_dot22scalar',

--- a/theano/tensor/blas_headers.py
+++ b/theano/tensor/blas_headers.py
 """ Header text for the C and Fortran BLAS interfaces.

 There is no standard name or location for this header, so we just insert it
-ourselves into the C code
+ourselves into the C code.
+
 """
 import logging
 import textwrap
@@ -32,6 +33,7 @@ def detect_macos_sdot_bug():
          detected. Its value is returned by the function
        - detect_macos_sdot_bug.fix_works will be set to True if the fix was
          attempted, and succeeded.
+
    """
    _logger.debug('Starting detection of bug in Mac OS BLAS sdot_ routine')
    if detect_macos_sdot_bug.tested:

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
--- a/theano/tensor/elemwise_cgen.py
+++ b/theano/tensor/elemwise_cgen.py
@@ -5,8 +5,8 @@ import theano
 def make_declare(loop_orders, dtypes, sub):
    """
    Produce code to declare all necessary variables.
-    """

+    """
    decl = ""
    for i, (loop_order, dtype) in enumerate(zip(loop_orders, dtypes)):
        var = sub['lv%i' % i]  # input name corresponding to ith loop variable
@@ -117,8 +117,11 @@ def make_checks(loop_orders, dtypes, sub):
 def make_alloc(loop_orders, dtype, sub, fortran='0'):
    """Generate C code to allocate outputs.

-    :param fortran: a string included in the generated code. If it
-        evaludate to non-zero, an ndarray in fortran order will be
+    Parameters
+    ----------
+    fortran : str
+        A string included in the generated code. If it
+        evaluate to non-zero, an ndarray in fortran order will be
        created, otherwise it will be c order.

    """
@@ -179,25 +182,24 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None):
    Make a nested loop over several arrays and associate specific code
    to each level of nesting.

-    @type loop_orders: list of N tuples of length M.
-    @param loop_orders: Each value of each
-      tuple can be either the index of a dimension to loop over or
-      the letter 'x' which means there is no looping to be done
-      over that variable at that point (in other words we broadcast
-      over that dimension). If an entry is an integer, it will become
-      an alias of the entry of that rank.
-
-    @type loop_tasks: list of M+1 pieces of code.
-    @param loop_tasks: The ith loop_task is a pair of strings, the first
-      string is code to be executed before the ith loop starts, the second
-      one contains code to be executed just before going to the next element
-      of the ith dimension.
-      The last element if loop_tasks is a single string, containing code
-      to be executed at the very end.
-
-    @type sub: a dictionary.
-    @param sub: Maps 'lv#' to a suitable variable name.
-      The 'lvi' variable corresponds to the ith element of loop_orders.
+    Parameters
+    ----------
+    loop_orders : list of N tuples of length M
+        Each value of each tuple can be either the index of a dimension to
+        loop over or the letter 'x' which means there is no looping to be done
+        over that variable at that point (in other words we broadcast
+        over that dimension). If an entry is an integer, it will become
+        an alias of the entry of that rank.
+    loop_tasks : list of M+1 pieces of code
+        The ith loop_task is a pair of strings, the first
+        string is code to be executed before the ith loop starts, the second
+        one contains code to be executed just before going to the next element
+        of the ith dimension.
+        The last element if loop_tasks is a single string, containing code
+        to be executed at the very end.
+    sub : dictionary
+        Maps 'lv#' to a suitable variable name.
+        The 'lvi' variable corresponds to the ith element of loop_orders.

    """
    def loop_over(preloop, code, indices, i):
@@ -244,8 +246,9 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub, openmp=None):
    return "{%s}" % s


-def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, openmp=None):
-    '''A bit like make_loop, but when only the inner-most loop executes code.
+def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub,
+                        openmp=None):
+    """A bit like make_loop, but when only the inner-most loop executes code.

    All the loops will be reordered so that the loops over the output tensor
    are executed with memory access as contiguous as possible.
@@ -253,7 +256,8 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op
    will be on its rows; if it's f_contiguous, it will be on its columns.

    The output tensor's index among the loop variables is indicated by olv_index.
-    '''
+
+    """

    # Number of variables
    nvars = len(init_loop_orders)
@@ -338,6 +342,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub, op
        Returns a list containing a C expression representing the
        stride for each dimension of the ith variable, in the
        specified loop_order.
+
        """
        var = sub["lv%i" % i]
        r = []
@@ -463,25 +468,25 @@ def make_loop_careduce(loop_orders, dtypes, loop_tasks, sub):
    Make a nested loop over several arrays and associate specific code
    to each level of nesting.

-    @type loop_orders: list of N tuples of length M.
-    @param loop_orders: Each value of each
-      tuple can be either the index of a dimension to loop over or
-      the letter 'x' which means there is no looping to be done
-      over that variable at that point (in other words we broadcast
-      over that dimension). If an entry is an integer, it will become
-      an alias of the entry of that rank.
-
-    @type loop_tasks: list of M+1 pieces of code.
-    @param loop_tasks: The ith loop_task is a pair of strings, the first
-      string is code to be executed before the ith loop starts, the second
-      one contains code to be executed just before going to the next element
-      of the ith dimension.
-      The last element if loop_tasks is a single string, containing code
-      to be executed at the very end.
-
-    @type sub: a dictionary.
-    @param sub: Maps 'lv#' to a suitable variable name.
-      The 'lvi' variable corresponds to the ith element of loop_orders.
+    Parameters
+    ----------
+    loop_orders : list of N tuples of length M
+        Each value of each tuple can be either the index of a dimension to
+        loop over or the letter 'x' which means there is no looping to be done
+        over that variable at that point (in other words we broadcast
+        over that dimension). If an entry is an integer, it will become
+        an alias of the entry of that rank.
+    loop_tasks : list of M+1 pieces of code
+        The ith loop_task is a pair of strings, the first
+        string is code to be executed before the ith loop starts, the second
+        one contains code to be executed just before going to the next element
+        of the ith dimension.
+        The last element if loop_tasks is a single string, containing code
+        to be executed at the very end.
+    sub: dictionary
+        Maps 'lv#' to a suitable variable name.
+        The 'lvi' variable corresponds to the ith element of loop_orders.
+
    """

    def loop_over(preloop, code, indices, i):

--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
--- a/theano/tensor/io.py
+++ b/theano/tensor/io.py
@@ -11,13 +11,18 @@ import theano

 class LoadFromDisk(Op):
    """
-    An operation to load an array from disk
+    An operation to load an array from disk.

    See Also
-        load
+    --------
+    load
+
+    Notes
+    -----
+    Non-differentiable.

-    @note: Non-differentiable.
    """
+
    __props__ = ("dtype", "broadcastable", "mmap_mode")

    def __init__(self, dtype, broadcastable, mmap_mode=None):
@@ -53,18 +58,26 @@ def load(path, dtype, broadcastable, mmap_mode=None):
    """
    Load an array from an .npy file.

-    :param path: A Generic symbolic variable, that will contain a string
-    :param dtype: The data type of the array to be read.
-    :param broadcastable: The broadcastable pattern of the loaded array,
-      for instance, (False,) for a vector, (False, True) for a column,
-      (False, False) for a matrix.
-    :param mmap_mode: How the file will be loaded. None means that the
-      data will be copied into an array in memory, 'c' means that the file
-      will be mapped into virtual memory, so only the parts that are
-      needed will be actually read from disk and put into memory.
-      Other modes supported by numpy.load ('r', 'r+', 'w+') cannot
-      be supported by Theano.
-
+    Parameters
+    ----------
+    path
+        A Generic symbolic variable, that will contain a string
+    dtype : data-type
+        The data type of the array to be read.
+    broadcastable
+        The broadcastable pattern of the loaded array, for instance,
+        (False,) for a vector, (False, True) for a column,
+        (False, False) for a matrix.
+    mmap_mode
+        How the file will be loaded. None means that the
+        data will be copied into an array in memory, 'c' means that the file
+        will be mapped into virtual memory, so only the parts that are
+        needed will be actually read from disk and put into memory.
+        Other modes supported by numpy.load ('r', 'r+', 'w+') cannot
+        be supported by Theano.
+
+    Examples
+    --------
    >>> from theano import *
    >>> path = Variable(Generic())
    >>> x = tensor.load(path, 'int64', (False,))
@@ -72,6 +85,7 @@ def load(path, dtype, broadcastable, mmap_mode=None):
    >>> fn = function([path], y)
    >>> fn("stored-array.npy")
    array([0, 2, 4, 6, 8], dtype=int64)
+
    """

    return LoadFromDisk(dtype, broadcastable, mmap_mode)(path)
@@ -91,14 +105,19 @@ else:

 class MPIRecv(Op):
    """
-    An operation to asynchronously receive an array to a remote host using MPI
+    An operation to asynchronously receive an array to a remote host using MPI.

    See Also
-       MPIRecv
-       MPIWait
+    --------
+    MPIRecv
+    MPIWait
+
+    Notes
+    -----
+    Non-differentiable.

-    @note: Non-differentiable.
    """
+
    __props__ = ("source", "tag", "shape", "dtype")

    def __init__(self, source, tag, shape, dtype):
@@ -134,13 +153,18 @@ class MPIRecv(Op):

 class MPIRecvWait(Op):
    """
-    An operation to wait on a previously received array using MPI
+    An operation to wait on a previously received array using MPI.

    See Also
-       MPIRecv
+    --------
+    MPIRecv
+
+    Notes
+    -----
+    Non-differentiable.

-    @note: Non-differentiable.
    """
+
    __props__ = ("tag",)

    def __init__(self, tag):
@@ -168,14 +192,19 @@ class MPIRecvWait(Op):

 class MPISend(Op):
    """
-    An operation to asynchronously Send an array to a remote host using MPI
+    An operation to asynchronously Send an array to a remote host using MPI.

    See Also
-       MPIRecv
-       MPISendWait
+    --------
+    MPIRecv
+    MPISendWait
+
+    Notes
+    -----
+    Non-differentiable.

-    @note: Non-differentiable.
    """
+
    __props__ = ("dest", "tag")

    def __init__(self, dest, tag):
@@ -202,12 +231,16 @@ class MPISend(Op):

 class MPISendWait(Op):
    """
-    An operation to wait on a previously sent array using MPI
+    An operation to wait on a previously sent array using MPI.
+
+    See Also
+    --------
+    MPISend

-    See Also:
-       MPISend
+    Notes
+    -----
+    Non-differentiable.

-    @note: Non-differentiable.
    """

    __props__ = ("tag",)
@@ -227,35 +260,35 @@ class MPISendWait(Op):

 def isend(var, dest, tag):
    """
-    Non blocking send
+    Non blocking send.
    """
    return MPISend(dest, tag)(var)


 def send(var, dest, tag):
    """
-    blocking send
+    Blocking send.
    """
    return MPISendWait(tag)(*isend(var, dest, tag))


 def irecv(shape, dtype, source, tag):
    """
-    non-blocking receive
+    Non-blocking receive.
    """
    return MPIRecv(source, tag, shape, dtype)()


 def recv(shape, dtype, source, tag):
    """
-    blocking receive
+    Blocking receive.
    """
    return MPIRecvWait(tag)(*irecv(shape, dtype, source, tag))


 # Ordering keys for scheduling
 def mpi_send_wait_key(a):
-    """ Wait as long as possible on Waits, Start Send/Recvs early """
+    """Wait as long as possible on Waits, Start Send/Recvs early."""
    if isinstance(a.op, (MPIRecvWait, MPISendWait)):
        return 1
    if isinstance(a.op, (MPIRecv, MPISend)):
@@ -264,7 +297,7 @@ def mpi_send_wait_key(a):


 def mpi_tag_key(a):
-    """ Break MPI ties by using the variable tag - prefer lower tags first """
+    """Break MPI ties by using the variable tag - prefer lower tags first."""
    if isinstance(a.op, (MPISend, MPIRecv, MPIRecvWait, MPISendWait)):
        return a.op.tag
    else:

--- a/theano/tensor/nlinalg.py
+++ b/theano/tensor/nlinalg.py
@@ -17,17 +17,18 @@ logger = logging.getLogger(__name__)
 class MatrixPinv(Op):
    """Computes the pseudo-inverse of a matrix :math:`A`.

-    The pseudo-inverse of a matrix A, denoted :math:`A^+`, is
+    The pseudo-inverse of a matrix :math:`A`, denoted :math:`A^+`, is
    defined as: "the matrix that 'solves' [the least-squares problem]
    :math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
    :math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.

    Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix.
-    This method is not faster then `matrix_inverse`. Its strength comes from
+    This method is not faster than `matrix_inverse`. Its strength comes from
    that it works for non-square matrices.
    If you have a square matrix though, `matrix_inverse` can be both more
    exact and faster to compute. Also this op does not get optimized into a
    solve op.
+
    """

    __props__ = ()
@@ -55,8 +56,11 @@ class MatrixInverse(Op):
    matrix :math:`A_{inv}` such that the dot product :math:`A \cdot A_{inv}`
    and :math:`A_{inv} \cdot A` equals the identity matrix :math:`I`.

-    :note: When possible, the call to this op will be optimized to the call
-           of ``solve``.
+    Notes
+    -----
+    When possible, the call to this op will be optimized to the call
+    of ``solve``.
+
    """

    __props__ = ()
@@ -82,7 +86,7 @@ class MatrixInverse(Op):
        where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
        ``inputs``. Using the `matrix cookbook
        <http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
-        once can deduce that the relation corresponds to
+        one can deduce that the relation corresponds to

            .. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T.

@@ -99,9 +103,9 @@ class MatrixInverse(Op):
            .. math:: \frac{\partial X^{-1}}{\partial X}V,

        where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
-        ``inputs``.  Using the `matrix cookbook
+        ``inputs``. Using the `matrix cookbook
        <http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
-        once can deduce that the relation corresponds to
+        one can deduce that the relation corresponds to

            .. math:: X^{-1} \cdot V \cdot X^{-1}.

@@ -120,11 +124,12 @@ matrix_inverse = MatrixInverse()


 def matrix_dot(*args):
-    """ Shorthand for product between several dots
+    """ Shorthand for product between several dots.

    Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will
    generate the matrix product between all in the given order, namely
    :math:`A_0 \cdot A_1 \cdot A_2 \cdot .. \cdot A_N`.
+
    """
    rval = args[0]
    for a in args[1:]:
@@ -163,10 +168,14 @@ alloc_diag = AllocDiag()


 class ExtractDiag(Op):
-    """ Return the diagonal of a matrix.
+    """Return the diagonal of a matrix.
+
+    Notes
+    -----
+    Works on the GPU.

-    :note: work on the GPU.
    """
+
    __props__ = ("view",)

    def __init__(self, view=False):
@@ -246,14 +255,18 @@ def trace(X):
    """
    Returns the sum of diagonal elements of matrix X.

-    :note: work on GPU since 0.6rc4.
+    Notes
+    -----
+    Works on GPU since 0.6rc4.
+
    """
    return extract_diag(X).sum()


 class Det(Op):
-    """Matrix determinant
-    Input should be a square matrix
+    """
+    Matrix determinant. Input should be a square matrix.
+
    """

    __props__ = ()
@@ -287,9 +300,11 @@ det = Det()


 class Eig(Op):
-    """Compute the eigenvalues and right eigenvectors of a square array.
+    """
+    Compute the eigenvalues and right eigenvectors of a square array.

    """
+
    _numop = staticmethod(numpy.linalg.eig)
    __props__ = ()

@@ -317,6 +332,7 @@ class Eigh(Eig):
    Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.

    """
+
    _numop = staticmethod(numpy.linalg.eigh)
    __props__ = ('UPLO',)

@@ -363,6 +379,7 @@ class Eigh(Eig):
           .. math:: \frac{\partial\,v_{kn}}
                          {\partial a_{ij}} =
                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
+
        """
        x, = inputs
        w, v = self(x)
@@ -383,9 +400,11 @@ def _zero_disconnected(outputs, grads):


 class EighGrad(Op):
-    """Gradient of an eigensystem of a Hermitian matrix.
+    """
+    Gradient of an eigensystem of a Hermitian matrix.

    """
+
    __props__ = ('UPLO',)

    def __init__(self, UPLO='L'):
@@ -414,6 +433,7 @@ class EighGrad(Op):
        """
        Implements the "reverse-mode" gradient for the eigensystem of
        a square matrix.
+
        """
        x, w, v, W, V = inputs
        N = x.shape[0]
@@ -453,10 +473,13 @@ def eigh(a, UPLO='L'):
 class QRFull(Op):
    """
    Full QR Decomposition.
+
    Computes the QR decomposition of a matrix.
    Factor the matrix a as qr, where q is orthonormal
    and r is upper-triangular.
+
    """
+
    _numop = staticmethod(numpy.linalg.qr)
    __props__ = ('mode',)

@@ -484,9 +507,12 @@ class QRFull(Op):
 class QRIncomplete(Op):
    """
    Incomplete QR Decomposition.
+
    Computes the QR decomposition of a matrix.
    Factor the matrix a as qr and return a single matrix.
+
    """
+
    _numop = staticmethod(numpy.linalg.qr)
    __props__ = ('mode',)

@@ -513,15 +539,12 @@ def qr(a, mode="full"):
    Factor the matrix a as qr, where q
    is orthonormal and r is upper-triangular.

-    :type a:
-        array_like, shape (M, N)
-    :param a:
+    Parameters
+    ----------
+    a : array_like, shape (M, N)
        Matrix to be factored.

-    :type mode:
-        one of 'reduced', 'complete', 'r', 'raw', 'full' and
-        'economic', optional
-    :keyword mode:
+    mode : {'reduced', 'complete', 'r', 'raw', 'full', 'economic'}, optional
        If K = min(M, N), then

        'reduced'
@@ -558,19 +581,18 @@ def qr(a, mode="full"):
           both doing the same thing in the new numpy version but only
           full works on the old previous numpy version.

-    :rtype q:
-      matrix of float or complex, optional
-    :return q:
-      A matrix with orthonormal columns. When mode = 'complete' the
-      result is an orthogonal/unitary matrix depending on whether or
-      not a is real/complex. The determinant may be either +/- 1 in
-      that case.
-
-    :rtype r:
-      matrix of float or complex, optional
-    :return r:
-      The upper-triangular matrix.
+    Returns
+    -------
+    q : matrix of float or complex, optional
+        A matrix with orthonormal columns. When mode = 'complete' the
+        result is an orthogonal/unitary matrix depending on whether or
+        not a is real/complex. The determinant may be either +/- 1 in
+        that case.
+    r : matrix of float or complex, optional
+        The upper-triangular matrix.
+
    """
+
    x = [[2, 1], [3, 4]]
    if isinstance(numpy.linalg.qr(x, mode), tuple):
        return QRFull(mode)(a)
@@ -579,22 +601,26 @@ def qr(a, mode="full"):


 class SVD(Op):
+    """
+
+    Parameters
+    ----------
+    full_matrices : bool, optional
+        If True (default), u and v have the shapes (M, M) and (N, N),
+        respectively.
+        Otherwise, the shapes are (M, K) and (K, N), respectively,
+        where K = min(M, N).
+    compute_uv : bool, optional
+        Whether or not to compute u and v in addition to s.
+        True by default.
+
+    """

    # See doc in the docstring of the function just after this class.
    _numop = staticmethod(numpy.linalg.svd)
    __props__ = ('full_matrices', 'compute_uv')

    def __init__(self, full_matrices=True, compute_uv=True):
-        """
-        full_matrices : bool, optional
-            If True (default), u and v have the shapes (M, M) and (N, N),
-            respectively.
-            Otherwise, the shapes are (M, K) and (K, N), respectively,
-            where K = min(M, N).
-        compute_uv : bool, optional
-            Whether or not to compute u and v in addition to s.
-            True by default.
-        """
        self.full_matrices = full_matrices
        self.compute_uv = compute_uv

@@ -619,18 +645,21 @@ def svd(a, full_matrices=1, compute_uv=1):
    """
    This function performs the SVD on CPU.

-    :type full_matrices: bool, optional
-    :param full_matrices:
+    Parameters
+    ----------
+    full_matrices : bool, optional
        If True (default), u and v have the shapes (M, M) and (N, N),
        respectively.
        Otherwise, the shapes are (M, K) and (K, N), respectively,
        where K = min(M, N).
-    :type compute_uv: bool, optional
-    :param compute_uv:
+    compute_uv : bool, optional
        Whether or not to compute u and v in addition to s.
        True by default.

-    :returns: U, V and D matrices.
+    Returns
+    -------
+    U, V,  D : matrices
+
    """
    return SVD(full_matrices, compute_uv)(a)


--- a/theano/tensor/nnet/Conv3D.py
+++ b/theano/tensor/nnet/Conv3D.py
@@ -44,8 +44,13 @@ from theano.gradient import grad_undefined
 # the output function is only defined when dr, dc, dt are natural numbers.

 class Conv3D(theano.Op):
-    """ 3D `convolution` of multiple filters on a minibatch
-        :note: does not flip the kernel, moves kernel with a user specified stride
+    """
+    3D `convolution` of multiple filters on a minibatch.
+
+    Notes
+    -----
+    Does not flip the kernel, moves kernel with a user specified stride.
+
    """
    __props__ = ()

@@ -54,10 +59,17 @@ class Conv3D(theano.Op):

    def make_node(self, V, W, b, d):
        """
-            :param V: Visible unit, input(batch,row,column,time,in channel)
-            :param W: Weights, filter(out channel,row,column,time,in channel)
-            :param b: bias, shape == (W.shape[0],)
-            :param d: strides when moving the filter over the input(dx,dy,dt)
+        Parameters
+        ----------
+        V
+            Visible unit, input(batch,row,column,time,in channel)
+        W
+            Weights, filter(out channel,row,column,time,in channel)
+        b
+            bias, shape == (W.shape[0],)
+        d
+            strides when moving the filter over the input(dx,dy,dt)
+
        """

        V_ = T.as_tensor_variable(V)
@@ -539,28 +551,39 @@ _conv3D = Conv3D()

 def conv3D(V, W, b, d):
    """
-    3D "convolution" of multiple filters on a minibatch
+    3D "convolution" of multiple filters on a minibatch.
+
    (does not flip the kernel, moves kernel with a user specified stride)

-    :param V: Visible unit, input.
-        dimensions: (batch, row, column, time, in channel)
-    :param W: Weights, filter.
-        dimensions: (out channel, row, column, time ,in channel)
-    :param b: bias, shape == (W.shape[0],)
-    :param d: strides when moving the filter over the input(dx, dy, dt)
-
-    :note: The order of dimensions does not correspond to the one in `conv2d`.
-           This is for optimization.
-
-    :note: The GPU implementation is very slow. You should use
-           :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` or
-           :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` for a
-           GPU graph instead.
-
-    :see: Someone made a script that shows how to swap the axes
-          between both 3d convolution implementations in Theano. See
-          the last `attachment
-          <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_.
+    Parameters
+    ----------
+    V
+        Visible unit, input.
+        Dimensions: (batch, row, column, time, in channel).
+    W
+        Weights, filter.
+        Dimensions: (out channel, row, column, time ,in channel).
+    b
+        Bias, shape == (W.shape[0],).
+    d
+        Strides when moving the filter over the input(dx, dy, dt).
+
+    Notes
+    -----
+    The order of dimensions does not correspond to the one in `conv2d`.
+    This is for optimization.
+
+    The GPU implementation is very slow. You should use
+    :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>` or
+    :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>` for a
+    GPU graph instead.
+
+    See Also
+    --------
+    Someone made a script that shows how to swap the axes
+    between both 3d convolution implementations in Theano. See
+    the last `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_
+
 """
    return _conv3D(V, W, b, d)


--- a/theano/tensor/nnet/ConvGrad3D.py
+++ b/theano/tensor/nnet/ConvGrad3D.py
@@ -13,7 +13,11 @@ from theano.gradient import DisconnectedType
 # than visiting each weight gradient element once and passing through whole video

 class ConvGrad3D(theano.Op):
-    """ Gradient of Conv3D with respect to W """
+    """
+    Gradient of Conv3D with respect to W.
+
+    """
+
    __props__ = ()

    def c_code_cache_version(self):

--- a/theano/tensor/nnet/ConvTransp3D.py
+++ b/theano/tensor/nnet/ConvTransp3D.py
@@ -11,7 +11,12 @@ from theano.gradient import DisconnectedType


 class ConvTransp3D(theano.Op):
-    """ "Transpose" of Conv3D (Conv3D implements multiplication by an implicitly defined matrix W. This implements multiplication by its transpose) """
+    """
+    "Transpose" of Conv3D (Conv3D implements multiplication by an implicitly
+    defined matrix W. This implements multiplication by its transpose).
+
+    """
+
    __props__ = ()

    def c_code_cache_version(self):
@@ -19,10 +24,17 @@ class ConvTransp3D(theano.Op):

    def make_node(self, W, b, d, H, RShape=None):
        """
-        :param W: Weights, filter
-        :param b: bias, shape == (W.shape[0],)
-        :param d: strides when moving the filter over the input
-        :param H: The output of Conv3D
+        Parameters
+        ----------
+        W
+            Weights, filter
+        b
+            Bias, shape == (W.shape[0],).
+        d
+            Strides when moving the filter over the input.
+        H
+            The output of Conv3D.
+
        """
        W_ = T.as_tensor_variable(W)
        b_ = T.as_tensor_variable(b)

--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
--- a/theano/tensor/nnet/conv3d2d.py
+++ b/theano/tensor/nnet/conv3d2d.py
@@ -6,10 +6,13 @@ import theano.sandbox.cuda as cuda


 def get_diagonal_subtensor_view(x, i0, i1):
-    """Helper function for DiagonalSubtensor and
-    IncDiagonalSubtensor
+    """
+    Helper function for DiagonalSubtensor and IncDiagonalSubtensor.
+
+    Notes
+    -----
+    It returns a partial view of x, not a partial copy.

-    :note: it return a partial view of x, not a partial copy.
    """
    # We have to cast i0 and i0 to int because python 2.4 (and maybe later)
    # do not support indexing with 0-dim, 'int*' ndarrays.
@@ -27,13 +30,24 @@ def get_diagonal_subtensor_view(x, i0, i1):


 class DiagonalSubtensor(Op):
-    """Return a form a nd diagonal subtensor.
-
-    :param x: n-d tensor
-    :param i0: axis index in x
-    :param i1: axis index in x
-    :note: Work on the GPU.
-
+    """
+    Return a form a nd diagonal subtensor.
+
+    Parameters
+    ----------
+    x
+        n-d tensor
+    i0
+        Axis index in x
+    i1
+        Axis index in x
+
+    Notes
+    -----
+    Work on the GPU.
+
+    Extended summary
+    ----------------
    ``x`` is some n-dimensional tensor, but this Op only deals with a
    matrix-shaped slice, using axes i0 and i1. Without loss of
    generality, suppose that ``i0`` picks out our ``row`` dimension,
@@ -73,6 +87,7 @@ class DiagonalSubtensor(Op):
    see what's necessary at that point.

    """
+
    __props__ = ("inplace",)

    def __str__(self):
@@ -111,8 +126,10 @@ diagonal_subtensor = DiagonalSubtensor(False)

 class IncDiagonalSubtensor(Op):
    """
-    The gradient of DiagonalSubtensor
+    The gradient of DiagonalSubtensor.
+
    """
+
    __props__ = ("inplace",)

    def __str__(self):
@@ -153,26 +170,39 @@ inc_diagonal_subtensor = IncDiagonalSubtensor(False)
 def conv3d(signals, filters,
           signals_shape=None, filters_shape=None,
           border_mode='valid'):
-    """Convolve spatio-temporal filters with a movie.
+    """
+    Convolve spatio-temporal filters with a movie.

    It flips the filters.

-    :param signals: timeseries of images whose pixels have color channels.
-            shape: [Ns, Ts, C, Hs, Ws]
-    :param filters: spatio-temporal filters
-            shape: [Nf, Tf, C, Hf, Wf]
-    :param signals_shape: None or a tuple/list with the shape of signals
-    :param filters_shape: None or a tuple/list with the shape of filters
-    :param border_mode: The only one tested is 'valid'.
-
-    :note: Another way to define signals: (batch,  time, in channel, row, column)
-           Another way to define filters: (out channel,time,in channel, row, column)
-    :note: For the GPU, you can use this implementation or
-           :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`.
-
-    :see: Someone made a script that shows how to swap the axes between
-          both 3d convolution implementations in Theano. See the last
-          `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_.
+    Parameters
+    ----------
+    signals
+        Timeseries of images whose pixels have color channels.
+        Shape: [Ns, Ts, C, Hs, Ws].
+    filters
+        Spatio-temporal filters.
+        Shape: [Nf, Tf, C, Hf, Wf].
+    signals_shape
+        None or a tuple/list with the shape of signals.
+    filters_shape
+        None or a tuple/list with the shape of filters.
+    border_mode
+        The only one tested is 'valid'.
+
+    Notes
+    -----
+    Another way to define signals: (batch,  time, in channel, row, column)
+    Another way to define filters: (out channel,time,in channel, row, column)
+
+    For the GPU, you can use this implementation or
+    :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`.
+
+    See Also
+    --------
+    Someone made a script that shows how to swap the axes between
+    both 3d convolution implementations in Theano. See the last
+    `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_

    """

@@ -264,7 +294,8 @@ def conv3d(signals, filters,


 def make_gpu_optimizer(op, to_gpu):
-    """This function create optimizer that move some inputs to the GPU
+    """
+    This function create optimizer that move some inputs to the GPU
    for op that work on both CPU and GPU.

    The op object is created by calling op(), so good default value
@@ -272,8 +303,12 @@ def make_gpu_optimizer(op, to_gpu):

    We suppose the same op work with CPU and GPU inputs.

-    :param op: the op that support GPU inputs
-    :param to_gpu: a list of op inputs that are moved to the GPU.
+    Parameters
+    ----------
+    op
+        The op that support GPU inputs.
+    to_gpu
+        A list of op inputs that are moved to the GPU.

    """
    @theano.gof.local_optimizer([op, cuda.gpu_from_host])
@@ -281,6 +316,7 @@ def make_gpu_optimizer(op, to_gpu):
        """
        op(host_from_gpu()) -> host_from_gpu(op)
        gpu_from_host(op) -> op(gpu_from_host)
+
        """
        if isinstance(node.op, op):
            # op(host_from_gpu()) -> host_from_gpu(op)
@@ -314,7 +350,7 @@ if cuda.cuda_available:

 @theano.gof.local_optimizer([DiagonalSubtensor, IncDiagonalSubtensor])
 def local_inplace_DiagonalSubtensor(node):
-    """ also work for IncDiagonalSubtensor """
+    """Also work for IncDiagonalSubtensor."""
    if (isinstance(node.op, (DiagonalSubtensor, IncDiagonalSubtensor)) and
            not node.op.inplace):
        new_op = node.op.__class__(inplace=True)

--- a/theano/tensor/nnet/neighbours.py
+++ b/theano/tensor/nnet/neighbours.py
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
--- a/theano/tensor/nnet/sigm.py
+++ b/theano/tensor/nnet/sigm.py
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
--- a/theano/tensor/opt_uncanonicalize.py
+++ b/theano/tensor/opt_uncanonicalize.py
--- a/theano/tensor/raw_random.py
+++ b/theano/tensor/raw_random.py
--- a/theano/tensor/shared_randomstreams.py
+++ b/theano/tensor/shared_randomstreams.py
--- a/theano/tensor/sharedvar.py
+++ b/theano/tensor/sharedvar.py
--- a/theano/tensor/signal/conv.py
+++ b/theano/tensor/signal/conv.py
--- a/theano/tensor/signal/downsample.py
+++ b/theano/tensor/signal/downsample.py
--- a/theano/tensor/slinalg.py
+++ b/theano/tensor/slinalg.py
--- a/theano/tensor/sort.py
+++ b/theano/tensor/sort.py
--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
--- a/theano/tensor/type_other.py
+++ b/theano/tensor/type_other.py
--- a/theano/tensor/utils.py
+++ b/theano/tensor/utils.py
--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
--- a/theano/tensor/xlogx.py
+++ b/theano/tensor/xlogx.py