numpydoc for theano/sandbox/cuda/basic_ops.py

257d4b5f · Iban Harlouchet · a663afde · 257d4b5f
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -59,6 +59,7 @@ def as_cuda_array(obj):
 class HostFromGpu(GpuOp):
    """
    Implement the transfer from gpu to the cpu.
+
    """
    check_input = False

@@ -118,6 +119,7 @@ host_from_gpu = HostFromGpu()
 class GpuFromHost(GpuOp):
    """
    Implement the transfer from cpu to the gpu.
+
    """
    check_input = False

@@ -185,6 +187,7 @@ gpu_from_host = GpuFromHost()
 class GpuElemwise(GpuOp):
    """
    Implement a generic elemwise on the gpu.
+
    """
    nin = property(lambda self: self.scalar_op.nin)
    nout = property(lambda self: self.scalar_op.nout)
@@ -316,6 +319,7 @@ class GpuElemwise(GpuOp):
 class GpuDimShuffle(GpuOp):
    """
    Implement DimShuffle on the gpu.
+
    """
    check_broadcast = False

@@ -523,39 +527,47 @@ class GpuDimShuffle(GpuOp):


 class GpuCAReduce(GpuOp):
-    """GpuCAReduce is a Reduction along some dimensions by a scalar op.
+    """
+    GpuCAReduce is a Reduction along some dimensions by a scalar op.

    The dimensions along which to reduce is specified by the
    `reduce_mask` that you pass to the constructor.  The `reduce_mask`
    is a tuple of booleans (actually integers 0 or 1) that specify for
    each input dimension, whether to reduce it (1) or not (0).

-    For example, when scalar_op is a theano.scalar.basic.Add instance:
-
-      - reduce_mask == (1,) sums a vector to a scalar
-
-      - reduce_mask == (1,0) computes the sum of each column in a matrix
-
-      - reduce_mask == (0,1) computes the sum of each row in a matrix
-
-      - reduce_mask == (1,1,1) computes the sum of all elements in a 3-tensor.
-
-    :note: any reduce_mask of all zeros is a sort of 'copy', and may
-           be removed during graph optimization
-
+    Parameters
+    ----------
+    pre_scalar_op 
+        If present, must be a scalar op with only 1 input.
+        We will execute it on the input value before reduction.
+    
+    Notes
+    -----
    This Op is a work in progress.

    This op was recently upgraded from just GpuSum a general CAReduce. Not
    many code cases are supported for scalar_op being anything other than
-    scal.Add instances yet.
+    scal. Add instances yet.

    Important note: if you implement new cases for this op, be sure to
    benchmark them and make sure that they actually result in a speedup.
    GPUs are not especially well-suited to reduction operations so it is
    quite possible that the GPU might be slower for some cases.

-    pre_scalar_op: if present, must be a scalar op with only 1
-    input. We will execute it on the input value before reduction.
+    Examples
+    --------
+    When scalar_op is a theano.scalar.basic.Add instance:
+
+    - reduce_mask == (1,) sums a vector to a scalar
+
+    - reduce_mask == (1,0) computes the sum of each column in a matrix
+
+    - reduce_mask == (0,1) computes the sum of each row in a matrix
+
+    - reduce_mask == (1,1,1) computes the sum of all elements in a 3-tensor.
+
+    ..note:: Any reduce_mask of all zeros is a sort of 'copy', and may
+           be removed during graph optimization.

    """

@@ -620,8 +632,10 @@ class GpuCAReduce(GpuOp):
    """

    def supports_c_code(self, inputs):
-        """ Returns True if the current op and reduce pattern
-            has functioning C code """
+        """
+        Returns True if the current op and reduce pattern has functioning C code.
+
+        """

        # If we don't even have the right method, we certainly
        # don't support the C code
@@ -781,9 +795,10 @@ class GpuCAReduce(GpuOp):
        return sio.getvalue()

    def _makecall(self, node, name, x, z, fail, pattern=None):
-        """Return a string for making a kernel call.
+        """
+        Return a string for making a kernel call.

-            The return value looks something like:
+        The return value looks something like:

            .. code-block:: c

@@ -806,6 +821,7 @@ class GpuCAReduce(GpuOp):
                    PyErr_Format(PyExc_RuntimeError, "Cuda error: ... );
                    %(fail)s;
                }
+
        """
        sio = StringIO()
        if pattern is None:
@@ -874,7 +890,8 @@ class GpuCAReduce(GpuOp):

    def _k_decl(self, node, nodename, pattern=None,
                ndim=None, reduce_mask=None):
-        """Return a string to declare a kernel function
+        """
+        Return a string to declare a kernel function.

        The result will look something like this:

@@ -953,6 +970,7 @@ class GpuCAReduce(GpuOp):
        Otherwise, check that the scalar op is maximum or minimum
        and return first_item. It should be the first element of the reduction.
        As the maximum and minimum of the same value don't change, this work.
+
        """
        if hasattr(self.scalar_op, 'identity'):
            return str(self.scalar_op.identity)
@@ -980,16 +998,26 @@ class GpuCAReduce(GpuOp):

    def _assign_reduce(self, node, name, left, right, sub, pre):
        """
-            node: the node argument to this op's c_code
-            name: the name argument to this op's c_code
-            left: a C code string identifying an lvalue
-            right: a C code string identifying an expression
-            sub: the sub argument to this op's c_code
-            pre: If True, we will add the pre_scalar_op.c_code
-
-            returns C code to reduce left and right, assigning the
-            result to left."""
+        Parameters
+        ----------
+        node
+            The node argument to this op's c_code.
+        name
+            The name argument to this op's c_code.
+        left
+            A C code string identifying an lvalue.
+        right
+            A C code string identifying an expression.
+        sub
+            The sub argument to this op's c_code.
+        pre
+            If True, we will add the pre_scalar_op.c_code.
+
+        Returns
+        -------
+        C code to reduce left and right, assigning the result to left.

+        """
        x, = node.inputs

        dtype = x.dtype
@@ -1019,8 +1047,11 @@ class GpuCAReduce(GpuOp):
        """
        WRITEME

+        Parameters
+        ----------
        node, name, sub: these should be passed through from the original
        call to c_code
+
        """

        # This code (the code in new_version) is currently ignored.
@@ -1161,6 +1192,7 @@ class GpuCAReduce(GpuOp):
        IG: I believe, based on how this is called in c_code, that it
        is for the case where we are reducing on all axes and x is
        C contiguous.
+
        """
        if getattr(self.scalar_op, 'identity', None) == 0:
            zero_shp = "cudaMemset(%(z)s->devdata, 0, CudaNdarray_SIZE(%(z)s) * sizeof(float))" % locals()
@@ -1243,8 +1275,14 @@ class GpuCAReduce(GpuOp):

    def c_code_reduce_01X(self, sio, node, name, x, z, fail, N):
        """
-        :param N: the number of 1 in the pattern N=1 -> 01, N=2 -> 011 N=3 ->0111
-                  Work for N=1,2,3
+        
+        Parameters
+        ----------
+        N : int
+            The number of 1 in the pattern
+            N=1 -> 01, N=2 -> 011 N=3 ->0111
+            Works for N=1,2,3.
+
        """

        assert N in [1, 2, 3]
@@ -2395,6 +2433,7 @@ class GpuCAReduce(GpuOp):
 class GpuReshape(tensor.Reshape, GpuOp):
    """
    Implement Reshape on the gpu.
+
    """
    # __hash__, __eq__, __str__ come from tensor.Subtensor
    def make_node(self, x, shp):
@@ -2541,6 +2580,7 @@ class GpuReshape(tensor.Reshape, GpuOp):
 class GpuSubtensor(GpuOp, tensor.Subtensor):
    """
    Implement subtensor on the gpu.
+
    """
    check_broadcast = False

@@ -2647,6 +2687,7 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
 class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
    """
    Implement AdvancedSubtensor1 on the gpu.
+
    """
    # If True or False, we assert that we use the take version or not
    # If None, we choose the best one applicable
@@ -2762,6 +2803,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
 class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
    """
    Implement AdvancedIncSubtensor1 on the gpu.
+
    """
    def make_node(self, x, y, ilist):
        x_ = as_cuda_ndarray_variable(x)
@@ -2936,13 +2978,17 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):


 class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
-    """Implement AdvancedIncSubtensor1 on the gpu, but use function
+    """
+    Implement AdvancedIncSubtensor1 on the gpu, but use function
    only avail on compute capability 2.0 and more recent.
+
    """

    def make_node(self, x, y, ilist):
-        """It defer from GpuAdvancedIncSubtensor1 in that it make sure
+        """
+        It defer from GpuAdvancedIncSubtensor1 in that it make sure
        the index are of type long.
+
        """
        x_ = as_cuda_ndarray_variable(x)
        y_ = as_cuda_ndarray_variable(y)
@@ -3132,11 +3178,14 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
    """
    Implement IncSubtensor on the gpu.

-    Note: The optimization to make this inplace is in tensor/opt.
-          The same optimization handles IncSubtensor and GpuIncSubtensor.
-          This Op has c_code too; it inherits tensor.IncSubtensor's c_code.
-          The helper methods like do_type_checking, copy_of_x, etc. specialize
-          the c_code for this Op.
+    Notes
+    -----
+    The optimization to make this inplace is in tensor/opt.
+    The same optimization handles IncSubtensor and GpuIncSubtensor.
+    This Op has c_code too; it inherits tensor.IncSubtensor's c_code.
+    The helper methods like do_type_checking, copy_of_x, etc. specialize
+    the c_code for this Op.
+
    """

    def make_node(self, x, y, *inputs):
@@ -3146,22 +3195,31 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
        return Apply(self, [x, y] + rval.inputs[2:], [x.type()])

    def do_type_checking(self, node):
-        """ Should raise NotImplementedError if c_code does not support
+        """ 
+        Should raise NotImplementedError if c_code does not support
        the types involved in this node.
-        """

+        """
        if not isinstance(node.inputs[0].type, CudaNdarrayType):
            raise NotImplementedError()

    def copy_of_x(self, x):
        """
-            :param x: a string giving the name of a C variable
-                pointing to an array

-            :return: C code expression to make a copy of x
+        Parameters
+        ----------
+        x : str
+            A string giving the name of a C variable pointing to an array.
+
+        Returns
+        -------
+        C code expression to make a copy of x.
+
+        Notes
+        -----
+        Base class uses `PyArrayObject *`, subclasses may override for
+        different types of arrays.

-            Base class uses `PyArrayObject *`, subclasses may override for
-            different types of arrays.
        """
        return """(CudaNdarray*) CudaNdarray_Copy(%(x)s)""" % locals()

@@ -3170,12 +3228,16 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):

    def make_view_array(self, x, view_ndim):
        """
-            :param x: a string identifying an array to be viewed
-            :param view_ndim: a string specifying the number of dimensions
-                to have in the view

+        Parameters
+        ----------        
+        x : str
+            A string identifying an array to be viewed.
+        view_ndim : str
+            A string specifying the number of dimensions to have in the view.
            This doesn't need to actually set up the view with the
            right indexing; we'll do that manually later.
+
        """
        ret = """zview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
        if (CudaNdarray_set_device_data(
@@ -3201,18 +3263,28 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
        return ret

    def get_helper_c_code_args(self):
-        """ Return a dictionary of arguments to use with helper_c_code"""
+        """
+        Return a dictionary of arguments to use with helper_c_code.
+
+        """
        return {'c_prefix': 'CudaNdarray',
                'strides_mul': 4
                }

    def copy_into(self, view, source):
        """
-            view: string, C code expression for an array
-            source: string, C code expression for an array

-            returns a C code expression to copy source into view, and
-            return 0 on success
+        Parameters
+        ----------
+        view : str
+            C code expression for an array.
+        source : str
+            C code expression for an array
+
+        Returns
+        -------
+        A C code expression to copy source into view, and 0 on success.
+
        """
        # On the CPU it unbroadcast based on the run time shapes. We
        # need the same behavior on the GPU.
@@ -3245,6 +3317,7 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
 class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
    """
    Implement Flatten on the gpu.
+
    """
    def make_node(self, x):
        assert isinstance(x.type, CudaNdarrayType)
@@ -3257,6 +3330,7 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
 class GpuShape(tensor.Shape, GpuOp):
    """
    Implement Shape on the gpu.
+
    """
    def make_node(self, x):
        return Apply(self, [x], [tensor.lvector()])
@@ -3266,6 +3340,7 @@ gpu_shape = GpuShape()
 class GpuJoin(tensor.Join, GpuOp):
    """
    Implement Join on the gpu.
+
    """
    def make_node(self, *axis_and_tensors):
        axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
@@ -3516,7 +3591,11 @@ class GpuSplit(tensor.Split, GpuOp):


 class GpuAllocEmpty(GpuOp):
-    """Implement Alloc on the gpu, but without initializing memory."""
+    """
+    Implement Alloc on the gpu, but without initializing memory.
+
+    """
+ 
    __props__ = ()

    @staticmethod
@@ -3595,7 +3674,8 @@ gpu_alloc_empty = GpuAllocEmpty()


 class GpuAlloc(GpuAllocEmpty):
-    """Implement Alloc on the gpu.
+    """
+    Implement Alloc on the gpu.

    The memset_0 param is an optimization. When True, we call
    cudaMemset that is faster.
@@ -3706,8 +3786,10 @@ gpu_alloc = GpuAlloc()

 class CopyOnNegativeStrides(GpuOp):
    """
-    Checks if the input has contains negative strides. If it
-    does, returns a c contiguous copy.
+    Checks if the input has contains negative strides.
+    
+    If it does, returns a c contiguous copy.
+
    """
    view_map = {0: [0]}
    check_input = False
@@ -3781,6 +3863,7 @@ class GpuContiguous(GpuOp):
    """
    Always return a c contiguous output. Copy the input only if it is
    not already c contiguous.
+
    """
    view_map = {0: [0]}
    check_input = False
@@ -3855,9 +3938,16 @@ gpu_contiguous = GpuContiguous()
 # Those are predifined CudaNdarrayType as done in tensor.basic
 # Useful mostly for test as the gpu op are inserted automatically...
 def scalar(name=None, dtype=None):
-    """Return a symbolic scalar variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    """
+    Return a symbolic scalar variable.
+
+    Parameters
+    ----------
+    dtype 
+        numeric type (None means to use theano.config.floatX).
+    name : str
+        A name to attach to this variable.
+
    """
    if dtype is None:
        dtype = config.floatX
@@ -3867,9 +3957,16 @@ fscalar = CudaNdarrayType(dtype='float32', broadcastable=())


 def vector(name=None, dtype=None):
-    """Return a symbolic vector variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    """
+    Return a symbolic vector variable.
+
+    Parameters
+    ----------
+    dtype : 
+        Numeric type (None means to use theano.config.floatX).
+    name : 
+        A name to attach to this variable.
+
    """
    if dtype is None:
        dtype = config.floatX
@@ -3879,9 +3976,16 @@ fvector = CudaNdarrayType(dtype='float32', broadcastable=(False, ))


 def matrix(name=None, dtype=None):
-    """Return a symbolic matrix variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    """
+    Return a symbolic matrix variable.
+
+    Parameters
+    ----------
+    dtype
+        Numeric type (None means to use theano.config.floatX).
+    name
+        A name to attach to this variable.
+
    """
    if dtype is None:
        dtype = config.floatX
@@ -3891,9 +3995,16 @@ fmatrix = CudaNdarrayType(dtype='float32', broadcastable=(False, False))


 def row(name=None, dtype=None):
-    """Return a symbolic row variable (ndim=2, broadcastable=[True,False]).
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    """
+    Return a symbolic row variable (ndim=2, broadcastable=[True,False]).
+
+    Parameters
+    ----------
+    dtype
+        Numeric type (None means to use theano.config.floatX).
+    name : str
+        A name to attach to this variable.
+
    """
    if dtype is None:
        dtype = config.floatX
@@ -3903,9 +4014,16 @@ frow = CudaNdarrayType(dtype='float32', broadcastable=(True, False))


 def col(name=None, dtype=None):
-    """Return a symbolic column variable (ndim=2, broadcastable=[False,True]).
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    """
+    Return a symbolic column variable (ndim=2, broadcastable=[False,True]).
+
+    Parameters
+    ----------
+    dtype
+        Numeric type (None means to use theano.config.floatX).
+    name : str
+        A name to attach to this variable.
+
    """
    if dtype is None:
        dtype = config.floatX
@@ -3915,9 +4033,16 @@ fcol = CudaNdarrayType(dtype='float32', broadcastable=(False, True))


 def tensor3(name=None, dtype=None):
-    """Return a symbolic 3-D variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    """
+    Return a symbolic 3-D variable.
+
+    Parameters
+    ----------
+    dtype
+        Numeric type (None means to use theano.config.floatX).
+    name : str
+        A name to attach to this variable.
+
    """
    if dtype is None:
        dtype = config.floatX
@@ -3927,9 +4052,16 @@ ftensor3 = CudaNdarrayType(dtype='float32', broadcastable=(False,) * 3)


 def tensor4(name=None, dtype=None):
-    """Return a symbolic 4-D variable.
-    :param dtype: numeric type (None means to use theano.config.floatX)
-    :param name: a name to attach to this variable
+    """
+    Return a symbolic 4-D variable.
+
+    Parameters
+    ----------
+    dtype
+        Numeric type (None means to use theano.config.floatX).
+    name : str
+        A name to attach to this variable.
+
    """
    if dtype is None:
        dtype = config.floatX