Add a listing of ops for gpuarray.

3bf6f4cb · Arnaud Bergeron · f005cc21 · 3bf6f4cb · 3bf6f4cb · 3bf6f4cb
--- a/doc/library/sandbox/gpuarray/index.txt
+++ b/doc/library/sandbox/gpuarray/index.txt
@@ -13,4 +13,5 @@
 .. toctree::
    :maxdepth: 1
+    op
    type
--- a/doc/library/sandbox/gpuarray/op.txt
+++ b/doc/library/sandbox/gpuarray/op.txt
+.. _libdoc_gpuarray_op:
+================================
+List of gpuarray Ops implemented
+================================
+.. moduleauthor:: LISA
+Normally you should not call directly those Ops! Theano should
+automatically transform cpu ops to their gpu equivalent. So this list
+is just useful to let people know what is implemented on the gpu.
+Basic Op
+========
+.. automodule:: theano.sandbox.gpuarray.basic_ops
+    :members:
+Blas Op
+=======
+.. automodule:: theano.sandbox.gpuarray.blas
+    :members:
+.. automodule:: theano.sandbox.gpuarray.nerv
+    :members:
+Elemwise Op
+===========
+.. automodule:: theano.sandbox.gpuarray.elemwise
+    :members:
+Subtensor Op
+============
+.. automodule:: theano.sandbox.gpuarray.subtensor
+    :members:
+Nnet Op
+=======
+.. automodule:: theano.sandbox.gpuarray.nnet
+    :members:
+.. automodule:: theano.sandbox.gpuarray.neighbours
+    :members:
--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -27,6 +27,20 @@ from .fp16_help import write_w
 def as_gpuarray_variable(x, context_name):
+    """
+    This will attempt to convert `x` into a variable on the GPU.
+    It can take either a value of aother variable.  If `x` is already
+    suitable, it will be returned as-is.
+    Parameters
+    ----------
+    x
+        Object to convert
+    context_name : str or None
+        target context name for the result
+    """
    # If this is already some form of variable, try to avoid an extra transfer
    if isinstance(x, Variable):
        while True:
@@ -174,6 +188,13 @@ class Kernel(object):
 class GpuKernelBase(object):
+    """
+    Base class for operations that need to compile kernels.
+    It is not mandatory to use this class, but it helps with a lot of
+    the small things that you have to pay attention to.
+    """
    params_type = gpu_context_type
    def gpu_kernels(self, node, name):
@@ -274,10 +295,25 @@ class GpuKernelBase(object):
        return (self.c_code_cache_version(), self.kernel_version(node))
    def kernel_version(self, node):
+        """
+        If you override :meth:`c_code_cache_version_apply`, call this
+        method to have the version of the kernel support code and
+        device.
+        Parameters
+        ----------
+        node : apply node
+            The node that we need the cache version for.
+        """
        return (3, self.get_params(node).bin_id)
 class HostFromGpu(Op):
+    """
+    Transfer data to CPU.
+    """
    __props__ = ()
    _f16_ok = True
@@ -356,6 +392,10 @@ host_from_gpu = HostFromGpu()
 class GpuFromHost(Op):
+    """
+    Transfer data to GPU.
+    """
    __props__ = ('context_name',)
    _f16_ok = True
    params_type = gpu_context_type
@@ -443,6 +483,10 @@ class GpuFromHost(Op):
 class GpuToGpu(Op):
+    """
+    Transfer data between GPUs.
+    """
    __props__ = ('context_name',)
    _f16_ok = True
    params_type = gpu_context_type
@@ -494,6 +538,7 @@ class GpuToGpu(Op):
 class GpuAlloc(HideC, Alloc):
    """
+    Allocate initialized memory on the GPU.
    Parameters
    ----------
@@ -654,6 +699,10 @@ class GpuAlloc(HideC, Alloc):
 class GpuAllocEmpty(HideC, Alloc):
+    """
+    Allocate uninitialized memory on the GPU.
+    """
    __props__ = ('dtype', 'context_name')
    _f16_ok = True
    params_type = gpu_context_type
@@ -732,8 +781,10 @@ def empty_like(var):
 class GpuContiguous(Op):
    """
-    Always return a c contiguous output. Copy the input only if it is
+    Return a C contiguous version of the input.
-    not already c contiguous.
+    This may either pass the object as-is (if already C contiguous) or
+    make a copy.
    """
    __props__ = ()
@@ -793,7 +844,7 @@ gpu_contiguous = GpuContiguous()
 class GpuReshape(HideC, tensor.Reshape):
    """
-    Implement Reshape on the gpu.
+    Reshape for GPU variables.
    """
@@ -914,6 +965,10 @@ class GpuReshape(HideC, tensor.Reshape):
 class GpuJoin(HideC, Join):
+    """
+    Join for GPU.
+    """
    _f16_ok = True
    params_type = gpu_context_type
@@ -991,6 +1046,10 @@ gpu_join = GpuJoin()
 class GpuSplit(HideC, Split):
+    """
+    Split for GPU.
+    """
    def make_node(self, x, axis, splits):
        node = Split.make_node(self, x, axis, splits)
        x = as_gpuarray_variable(x, infer_context_name(x))
@@ -1002,6 +1061,10 @@ class GpuSplit(HideC, Split):
 class GpuEye(GpuKernelBase, Op):
+    """
+    Eye for GPU.
+    """
    __props__ = ('dtype', 'context_name')
    _f16_ok = True

--- a/theano/sandbox/gpuarray/blas.py
+++ b/theano/sandbox/gpuarray/blas.py
@@ -31,6 +31,10 @@ class BlasOp(Op):
 class GpuGemv(BlasOp):
+    """
+    Gemv on the GPU.
+    """
    __props__ = ('inplace',)
    def __init__(self, inplace=False):
@@ -107,6 +111,10 @@ gpugemv_inplace = GpuGemv(inplace=True)
 class GpuGemm(BlasOp):
+    """
+    Gemm on the GPU.
+    """
    __props__ = ('inplace',)
    _f16_ok = True
@@ -184,6 +192,10 @@ gpugemm_inplace = GpuGemm(inplace=True)
 class GpuGer(BlasOp):
+    """
+    Ger on the GPU.
+    """
    __props__ = ('inplace',)
    def __init__(self, inplace=False):
@@ -256,6 +268,10 @@ gpuger_inplace = GpuGer(inplace=True)
 class GpuDot22(BlasOp):
+    """
+    Dot22 on the GPU.
+    """
    __props__ = ()
    def make_node(self, x, y):

--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -57,6 +57,10 @@ def as_C_string_const(s):
 class GpuElemwise(GpuKernelBase, HideC, Elemwise):
+    """
+    Elemwise on the GPU.
+    """
    nin = property(lambda self: self.scalar_op.nin)
    nout = property(lambda self: self.scalar_op.nout)
    _f16_ok = True
@@ -445,6 +449,10 @@ class SupportCodeError(Exception):
 class GpuDimShuffle(HideC, DimShuffle):
+    """
+    DimShuffle on the GPU.
+    """
    _f16_ok = True
    def make_node(self, input):
@@ -548,7 +556,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
    Parameters
    ----------
-    reduce-mask
+    reduce_mask
        The dimensions along which to reduce. The `reduce_mask` is a tuple of
        booleans (actually integers 0 or 1) that specify for each input
        dimension, whether to reduce it (1) or not (0).
@@ -1279,14 +1287,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
        """ % locals()
    def c_code_reduce_ccontig(self, sio, node, name, x, z, fail):
-        """
-        WRITEME
-        IG: I believe, based on how this is called in c_code, that it
-        is for the case where we are reducing on all axes and x is
-        C contiguous.
-        """
        in_dtype = "npy_" + node.inputs[0].dtype
        out_dtype = "npy_" + node.outputs[0].dtype
        if getattr(self.scalar_op, 'identity', None) == 0:
@@ -2666,8 +2666,6 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
    """
    CAReduce that reuse the python code from gpuarray.
-    Too slow for now as it only have a python interface.
    """
    def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
        if not hasattr(scalar_op, 'identity'):

--- a/theano/sandbox/gpuarray/neighbours.py
+++ b/theano/sandbox/gpuarray/neighbours.py
@@ -17,6 +17,10 @@ from .type import GpuArrayType
 class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
+    """
+    Images2Neibs for the GPU.
+    """
    def __init__(self, mode='valid'):
        if mode not in ['valid', 'ignore_borders', 'wrap_centered']:
            raise NotImplementedError("Only the mode valid, ignore_borders"

--- a/theano/sandbox/gpuarray/nerv.py
+++ b/theano/sandbox/gpuarray/nerv.py
@@ -41,6 +41,9 @@ def ensure_float(val, name):
 class Gemm16(COp):
+    """
+    Gemm for float16 using the nervena kernels.
+    """
    __props__ = ('relu', 'inplace')
    _f16_ok = True
    params_type = gpu_context_type

--- a/theano/sandbox/gpuarray/subtensor.py
+++ b/theano/sandbox/gpuarray/subtensor.py
@@ -24,6 +24,9 @@ from .elemwise import GpuElemwise
 class GpuSubtensor(HideC, Subtensor):
+    """
+    Subtensor on the GPU.
+    """
    _f16_ok = True
    def make_node(self, x, *inputs):
@@ -173,8 +176,8 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
    The optimization to make this inplace is in tensor/opt.
    The same optimization handles IncSubtensor and GpuIncSubtensor.
    This Op has c_code too; it inherits tensor.IncSubtensor's c_code.
-    The helper methods like do_type_checking, copy_of_x, etc. specialize
+    The helper methods like :meth:`do_type_checking`,
-    the c_code for this Op.
+    :meth:`copy_of_x`, etc. specialize the c_code for this Op.
    """
@@ -405,6 +408,9 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
 class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
+    """
+    AdvancedSubrensor1 on the GPU.
+    """
    def make_node(self, x, ilist):
        ctx_name = infer_context_name(x, ilist)
        x_ = as_gpuarray_variable(x, ctx_name)
@@ -580,8 +586,10 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
    _f16_ok = True
    def make_node(self, x, y, ilist):
-        """It defer from GpuAdvancedIncSubtensor1 in that it make sure
+        """
-        the index are of type long.
+        It differs from GpuAdvancedIncSubtensor1 in that it makes sure
+        the indexes are of type long.
        """
        ctx_name = infer_context_name(x, y, ilist)
        x_ = as_gpuarray_variable(x, ctx_name)