numpydoc for theano/sandbox/cuda/opt.py

49af6efe · Iban Harlouchet · e9235e29 · 49af6efe
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -141,7 +141,9 @@ class InputToGpuOptimizer(Optimizer):
    Transfer the input of a graph to the gpu if it is necessary.
    It should make this part of the optimizer faster we will will need only 1
    pass on the fgraph.
+
    """
+
    def __init__(self):
        Optimizer.__init__(self)

@@ -208,7 +210,10 @@ def dtype_in_elemwise_supported(op):
    Return True of the Elemwise op is supported on the gpu.
    Return False otherwise.

-    :note: We need to check inside the Composite op.
+    Notes
+    -----
+    We need to check inside the Composite op.
+
    """
    def get_all_basic_scalar(composite_op):
        l = []
@@ -231,8 +236,10 @@ def dtype_in_elemwise_supported(op):
 @register_opt()
 @local_optimizer([tensor.Elemwise])
 def local_gpu_elemwise_0(node):
-    """elemwise(..., host_from_gpu, ...)
-       -> host_from_gpu(elemwise(gpu_from_host, ..., gpu_from_host)
+    """
+    Elemwise(..., host_from_gpu, ...)
+    -> host_from_gpu(elemwise(gpu_from_host, ..., gpu_from_host)
+
    """
    if (isinstance(node.op, tensor.Elemwise) and
        dtype_in_elemwise_supported(node.op)):
@@ -294,6 +301,7 @@ def local_gpu_elemwise_0(node):
 def local_gpu_elemwise_1(node):
    """
    gpu_from_host(Elemwise)) -> GpuElemwise(gpu_from_host(...))
+
    """
    if isinstance(node.op, GpuFromHost):
        host_i, = node.inputs
@@ -350,6 +358,7 @@ def local_gpu_dimshuffle_0(node):
    """
    dimshuffle(host_from_gpu()) -> host_from_gpu(gpu_dimshuffle)
    gpu_from_host(dimshuffle) -> gpu_dimshuffle(gpu_from_host)
+
    """
    if isinstance(node.op, tensor.DimShuffle):
        input, = node.inputs
@@ -375,6 +384,7 @@ def local_gpu_specifyShape_0(node):
    """
    specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
    gpu_from_host(specify_shape) -> specify_shape(gpu_from_host)
+
    """
    if isinstance(node.op, tensor.SpecifyShape):
        input = node.inputs[0]
@@ -403,11 +413,11 @@ def local_gpu_dot_to_dot22(node):
    transforming the vector into a matrix, apply gpudot22 and reshaping
    the output.

-    A more suitable solution would be to use the right cublas call
+    A more suitable solution would be to use the right cublas call.

-    This is needed in fast_compile
-    """
+    This is needed in fast_compile.

+    """
    # In case the got do input upcast, we much check that we can
    # make it run on the gpu.
    if isinstance(node.op, GpuFromHost):
@@ -482,10 +492,11 @@ theano.compile.optdb.register('assert_no_cpu_op', assert_no_cpu_op, 49.2)
 @register_opt()
 @local_optimizer([theano.ifelse.IfElse, gpu_from_host])
 def local_gpu_lazy_ifelse(node):
-    """
+    """    
    gpu_from_host(ifelse) -> gpu_ifelse(gpu_from_host)

    ifelse(host_from_gpu) -> host_from_gpu(ifelse)
+
    """
    if isinstance(node.op, theano.ifelse.IfElse) and not node.op.gpu:
        gpu_ifelse = theano.ifelse.IfElse(node.op.n_outs, gpu=True)
@@ -554,6 +565,7 @@ def local_gpu_dot22(node):
    gpu_from_host(dot22) -> gpudot(gpu_from_host)

    dot(host_from_gpu) -> host_from_gpu(gpudot22)
+
    """
    if isinstance(node.op, GpuFromHost):
        host_input = node.inputs[0]
@@ -577,6 +589,7 @@ def local_gpu_dot22scalar(node):
    gpu_from_host(dot22scalar) -> gpudot(gpu_from_host)

    dot(host_from_gpu) -> host_from_gpu(gpudot22scalar)
+
    """
    if isinstance(node.op, GpuFromHost):
        host_input = node.inputs[0]
@@ -602,7 +615,9 @@ def local_gpu_dot22scalar(node):
 def local_gpu_solve(node):
    """
    gpu_from_host(CpuSolve) -> GpuSolve(gpu_from_host)
+
    CpuSolve(host_from_gpu) -> host_from_gpu(GpuSolve)
+
    """
    if isinstance(node.op, GpuFromHost):
        host_input = node.inputs[0]
@@ -627,6 +642,7 @@ def local_gpu_solve(node):
 def local_gpu_gemv(node):
    """
    gpu_from_host(gemv) -> gpu_gemv(gpu_from_host)
+
    gemv(host_from_gpu) -> host_from_gpu(gpu_gemv)

    """
@@ -665,6 +681,7 @@ def local_gpu_gemv(node):
 def local_gpu_ger(node):
    """
    gpu_from_host(ger) -> gpu_ger(gpu_from_host)
+
    ger(host_from_gpu) -> host_from_gpu(gpu_ger)

    """
@@ -706,6 +723,7 @@ def local_gpu_gemm(node):
    gpu_from_host(gemm) -> gpu_gemm(gpu_from_host)

    gemm(host_from_gpu) -> host_from_gpu(gpu_gemm)
+
    """
    if isinstance(node.op, GpuFromHost):
        host_input = node.inputs[0]
@@ -1120,7 +1138,10 @@ def local_gpu_shape(node):
 @register_opt()
 @local_optimizer([tensor.Rebroadcast])
 def local_gpu_rebroadcast(node):
-    '''rebroadcast(host_from_gpu(x)) -> host_from_gpu(rebroadcast(x))'''
+    """
+    rebroadcast(host_from_gpu(x)) -> host_from_gpu(rebroadcast(x))
+
+    """
    if isinstance(node.op, tensor.Rebroadcast):
        x, = node.inputs
        if (x.owner and isinstance(x.owner.op, HostFromGpu)):
@@ -1342,7 +1363,8 @@ def local_conv_fft_full(node):


 def values_eq_approx_high_tol(a, b):
-    """This fct is needed to don't have DebugMode raise useless
+    """
+    This fct is needed to don't have DebugMode raise useless
    error due to ronding error.

    This happen as We reduce on the two last dimensions, so this
@@ -1364,6 +1386,7 @@ def local_gpu_conv(node):
    gpu_from_host(conv) -> gpu_conv(gpu_from_host)

    conv(host_from_gpu) -> host_from_gpu(gpu_conv)
+
    """
    def GpuConvOp_from_ConvOp(op):
        logical_img_hw = None
@@ -1534,7 +1557,10 @@ conv_groupopt.register('local_conv_gemm', local_conv_gemm, 30,


 class LocalCudaMetaOptimizer(LocalMetaOptimizer):
-    """Base class for CUDA-based LocalMetaOptimizers"""
+    """
+    Base class for CUDA-based LocalMetaOptimizers.
+
+    """

    def time_call(self, fn):
        # Override time_call() to do device synchronization
@@ -1827,7 +1853,6 @@ def local_gpu_join(node):
    by other opts, leaving us with
    host_from_gpu(gpu_join)

-
    For intermediate places in the graph not covered by the first opt, the
    following could be useful:

@@ -1911,8 +1936,12 @@ optdb.register('InplaceGpuBlasOpt',

 def get_device_type_sizes():
    """
-    :return:(gpu ptr size, cpu ptr size, int sizes(gpu and cpu))
-    :return type: tuple
+    
+    Returns
+    -------
+    tuple
+        (gpu ptr size, cpu ptr size, int sizes(gpu and cpu)).
+
    """
    if hasattr(get_device_type_sizes, 'rval'):
        return get_device_type_sizes.rval
@@ -1941,7 +1970,7 @@ def get_device_type_sizes():

 def max_inputs_to_GpuElemwise(node):
    """
-    return the maximum number of inputs this GpuElemwise Apply node can
+    Return the maximum number of inputs this GpuElemwise Apply node can
    accept.

    This is needed as currently there is a limit of 256 bytes of
@@ -1950,8 +1979,8 @@ def max_inputs_to_GpuElemwise(node):
    2.x (not used).

    This measures the number of parameters we put in our GPU function and
-    computes the maximum number of inputs that respect the 256 byte
-    limit.
+    computes the maximum number of inputs that respect the 256 byte limit.
+
    """
    type_sizes = get_device_type_sizes()
    int_size = type_sizes['int_size']
@@ -1986,6 +2015,7 @@ def split_huge_add_or_mul(node):

    This should not happen for other GpuElemwise as their is only the fusion
    that can generate op with too much input and it check for that.
+
    """
    if node.op.scalar_op in (scal.add, scal.mul):
        max_nb_inputs = max_inputs_to_GpuElemwise(node)
@@ -2135,6 +2165,7 @@ def local_gpu_eye(node):
    gpu_from_host(eye) -> gpueye(gpu_from_host)

    eye(host_from_gpu) -> host_from_gpu(gpueye)
+
    """
    if isinstance(node.op, GpuFromHost):
        host_input = node.inputs[0]
@@ -2167,10 +2198,11 @@ def safe_to_cpu(x):
 def gpu_safe_new(x, tag=''):
    """
    Internal function that constructs a new variable from x with the same
-    type, but with a different name ( old name + tag). This function is used
+    type, but with a different name (old name + tag). This function is used
    by gradient, or the R-op to construct new variables for the inputs of
    the inner graph such that there is no interference between the original
    graph and the newly constructed graph.
+
    """
    if hasattr(x, 'name') and x.name is not None:
        nw_name = x.name + tag
@@ -2188,8 +2220,9 @@ def gpu_reconstruct_graph(inputs, outputs, tag=None):
    """
    Different interface to clone, that allows you to pass inputs.
    Compared to clone, this method always replaces the inputs with
-    new variables of the same type, and returns those ( in the same
+    new variables of the same type, and returns those (in the same
    order as the original inputs).
+
    """
    if tag is None:
        tag = ''
@@ -2217,7 +2250,9 @@ def tensor_to_cuda(x):
 def local_gpu_extract_diagonal(node):
    """
    extract_diagonal(host_from_gpu()) -> host_from_gpu(extract_diagonal)
+
    gpu_from_host(extract_diagonal) -> extract_diagonal(gpu_from_host)
+
    """
    if (isinstance(node.op, nlinalg.ExtractDiag) and
        isinstance(node.inputs[0].type,
@@ -2249,9 +2284,10 @@ def typeConstructor(broadcastable, dtype):
 def gpuScanOptimization(node):
    """
    scan(host_from_gpu) -> host_from_gpu(GPUscan)
+
    gpu_from_host(scan) -> GPUscan(gpu_from_host)
-    """

+    """
    # gpu_from_host(scan) -> GPUscan(gpu_from_host)
    if isinstance(node.op, GpuFromHost):
        host_input = node.inputs[0]