提交 257d4b5f authored 作者: Iban Harlouchet's avatar Iban Harlouchet

numpydoc for theano/sandbox/cuda/basic_ops.py

上级 a663afde
......@@ -59,6 +59,7 @@ def as_cuda_array(obj):
class HostFromGpu(GpuOp):
"""
Implement the transfer from gpu to the cpu.
"""
check_input = False
......@@ -118,6 +119,7 @@ host_from_gpu = HostFromGpu()
class GpuFromHost(GpuOp):
"""
Implement the transfer from cpu to the gpu.
"""
check_input = False
......@@ -185,6 +187,7 @@ gpu_from_host = GpuFromHost()
class GpuElemwise(GpuOp):
"""
Implement a generic elemwise on the gpu.
"""
nin = property(lambda self: self.scalar_op.nin)
nout = property(lambda self: self.scalar_op.nout)
......@@ -316,6 +319,7 @@ class GpuElemwise(GpuOp):
class GpuDimShuffle(GpuOp):
"""
Implement DimShuffle on the gpu.
"""
check_broadcast = False
......@@ -523,39 +527,47 @@ class GpuDimShuffle(GpuOp):
class GpuCAReduce(GpuOp):
"""GpuCAReduce is a Reduction along some dimensions by a scalar op.
"""
GpuCAReduce is a Reduction along some dimensions by a scalar op.
The dimensions along which to reduce is specified by the
`reduce_mask` that you pass to the constructor. The `reduce_mask`
is a tuple of booleans (actually integers 0 or 1) that specify for
each input dimension, whether to reduce it (1) or not (0).
For example, when scalar_op is a theano.scalar.basic.Add instance:
- reduce_mask == (1,) sums a vector to a scalar
- reduce_mask == (1,0) computes the sum of each column in a matrix
- reduce_mask == (0,1) computes the sum of each row in a matrix
- reduce_mask == (1,1,1) computes the sum of all elements in a 3-tensor.
:note: any reduce_mask of all zeros is a sort of 'copy', and may
be removed during graph optimization
Parameters
----------
pre_scalar_op
If present, must be a scalar op with only 1 input.
We will execute it on the input value before reduction.
Notes
-----
This Op is a work in progress.
This op was recently upgraded from just GpuSum a general CAReduce. Not
many code cases are supported for scalar_op being anything other than
scal.Add instances yet.
scal. Add instances yet.
Important note: if you implement new cases for this op, be sure to
benchmark them and make sure that they actually result in a speedup.
GPUs are not especially well-suited to reduction operations so it is
quite possible that the GPU might be slower for some cases.
pre_scalar_op: if present, must be a scalar op with only 1
input. We will execute it on the input value before reduction.
Examples
--------
When scalar_op is a theano.scalar.basic.Add instance:
- reduce_mask == (1,) sums a vector to a scalar
- reduce_mask == (1,0) computes the sum of each column in a matrix
- reduce_mask == (0,1) computes the sum of each row in a matrix
- reduce_mask == (1,1,1) computes the sum of all elements in a 3-tensor.
..note:: Any reduce_mask of all zeros is a sort of 'copy', and may
be removed during graph optimization.
"""
......@@ -620,8 +632,10 @@ class GpuCAReduce(GpuOp):
"""
def supports_c_code(self, inputs):
""" Returns True if the current op and reduce pattern
has functioning C code """
"""
Returns True if the current op and reduce pattern has functioning C code.
"""
# If we don't even have the right method, we certainly
# don't support the C code
......@@ -781,9 +795,10 @@ class GpuCAReduce(GpuOp):
return sio.getvalue()
def _makecall(self, node, name, x, z, fail, pattern=None):
"""Return a string for making a kernel call.
"""
Return a string for making a kernel call.
The return value looks something like:
The return value looks something like:
.. code-block:: c
......@@ -806,6 +821,7 @@ class GpuCAReduce(GpuOp):
PyErr_Format(PyExc_RuntimeError, "Cuda error: ... );
%(fail)s;
}
"""
sio = StringIO()
if pattern is None:
......@@ -874,7 +890,8 @@ class GpuCAReduce(GpuOp):
def _k_decl(self, node, nodename, pattern=None,
ndim=None, reduce_mask=None):
"""Return a string to declare a kernel function
"""
Return a string to declare a kernel function.
The result will look something like this:
......@@ -953,6 +970,7 @@ class GpuCAReduce(GpuOp):
Otherwise, check that the scalar op is maximum or minimum
and return first_item. It should be the first element of the reduction.
As the maximum and minimum of the same value don't change, this work.
"""
if hasattr(self.scalar_op, 'identity'):
return str(self.scalar_op.identity)
......@@ -980,16 +998,26 @@ class GpuCAReduce(GpuOp):
def _assign_reduce(self, node, name, left, right, sub, pre):
"""
node: the node argument to this op's c_code
name: the name argument to this op's c_code
left: a C code string identifying an lvalue
right: a C code string identifying an expression
sub: the sub argument to this op's c_code
pre: If True, we will add the pre_scalar_op.c_code
returns C code to reduce left and right, assigning the
result to left."""
Parameters
----------
node
The node argument to this op's c_code.
name
The name argument to this op's c_code.
left
A C code string identifying an lvalue.
right
A C code string identifying an expression.
sub
The sub argument to this op's c_code.
pre
If True, we will add the pre_scalar_op.c_code.
Returns
-------
C code to reduce left and right, assigning the result to left.
"""
x, = node.inputs
dtype = x.dtype
......@@ -1019,8 +1047,11 @@ class GpuCAReduce(GpuOp):
"""
WRITEME
Parameters
----------
node, name, sub: these should be passed through from the original
call to c_code
"""
# This code (the code in new_version) is currently ignored.
......@@ -1161,6 +1192,7 @@ class GpuCAReduce(GpuOp):
IG: I believe, based on how this is called in c_code, that it
is for the case where we are reducing on all axes and x is
C contiguous.
"""
if getattr(self.scalar_op, 'identity', None) == 0:
zero_shp = "cudaMemset(%(z)s->devdata, 0, CudaNdarray_SIZE(%(z)s) * sizeof(float))" % locals()
......@@ -1243,8 +1275,14 @@ class GpuCAReduce(GpuOp):
def c_code_reduce_01X(self, sio, node, name, x, z, fail, N):
"""
:param N: the number of 1 in the pattern N=1 -> 01, N=2 -> 011 N=3 ->0111
Work for N=1,2,3
Parameters
----------
N : int
The number of 1 in the pattern
N=1 -> 01, N=2 -> 011 N=3 ->0111
Works for N=1,2,3.
"""
assert N in [1, 2, 3]
......@@ -2395,6 +2433,7 @@ class GpuCAReduce(GpuOp):
class GpuReshape(tensor.Reshape, GpuOp):
"""
Implement Reshape on the gpu.
"""
# __hash__, __eq__, __str__ come from tensor.Subtensor
def make_node(self, x, shp):
......@@ -2541,6 +2580,7 @@ class GpuReshape(tensor.Reshape, GpuOp):
class GpuSubtensor(GpuOp, tensor.Subtensor):
"""
Implement subtensor on the gpu.
"""
check_broadcast = False
......@@ -2647,6 +2687,7 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
"""
Implement AdvancedSubtensor1 on the gpu.
"""
# If True or False, we assert that we use the take version or not
# If None, we choose the best one applicable
......@@ -2762,6 +2803,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
"""
Implement AdvancedIncSubtensor1 on the gpu.
"""
def make_node(self, x, y, ilist):
x_ = as_cuda_ndarray_variable(x)
......@@ -2936,13 +2978,17 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
"""Implement AdvancedIncSubtensor1 on the gpu, but use function
"""
Implement AdvancedIncSubtensor1 on the gpu, but use function
only avail on compute capability 2.0 and more recent.
"""
def make_node(self, x, y, ilist):
"""It defer from GpuAdvancedIncSubtensor1 in that it make sure
"""
It defer from GpuAdvancedIncSubtensor1 in that it make sure
the index are of type long.
"""
x_ = as_cuda_ndarray_variable(x)
y_ = as_cuda_ndarray_variable(y)
......@@ -3132,11 +3178,14 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
Implement IncSubtensor on the gpu.
Note: The optimization to make this inplace is in tensor/opt.
The same optimization handles IncSubtensor and GpuIncSubtensor.
This Op has c_code too; it inherits tensor.IncSubtensor's c_code.
The helper methods like do_type_checking, copy_of_x, etc. specialize
the c_code for this Op.
Notes
-----
The optimization to make this inplace is in tensor/opt.
The same optimization handles IncSubtensor and GpuIncSubtensor.
This Op has c_code too; it inherits tensor.IncSubtensor's c_code.
The helper methods like do_type_checking, copy_of_x, etc. specialize
the c_code for this Op.
"""
def make_node(self, x, y, *inputs):
......@@ -3146,22 +3195,31 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
return Apply(self, [x, y] + rval.inputs[2:], [x.type()])
def do_type_checking(self, node):
""" Should raise NotImplementedError if c_code does not support
"""
Should raise NotImplementedError if c_code does not support
the types involved in this node.
"""
"""
if not isinstance(node.inputs[0].type, CudaNdarrayType):
raise NotImplementedError()
def copy_of_x(self, x):
"""
:param x: a string giving the name of a C variable
pointing to an array
:return: C code expression to make a copy of x
Parameters
----------
x : str
A string giving the name of a C variable pointing to an array.
Returns
-------
C code expression to make a copy of x.
Notes
-----
Base class uses `PyArrayObject *`, subclasses may override for
different types of arrays.
Base class uses `PyArrayObject *`, subclasses may override for
different types of arrays.
"""
return """(CudaNdarray*) CudaNdarray_Copy(%(x)s)""" % locals()
......@@ -3170,12 +3228,16 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def make_view_array(self, x, view_ndim):
"""
:param x: a string identifying an array to be viewed
:param view_ndim: a string specifying the number of dimensions
to have in the view
Parameters
----------
x : str
A string identifying an array to be viewed.
view_ndim : str
A string specifying the number of dimensions to have in the view.
This doesn't need to actually set up the view with the
right indexing; we'll do that manually later.
"""
ret = """zview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
if (CudaNdarray_set_device_data(
......@@ -3201,18 +3263,28 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
return ret
def get_helper_c_code_args(self):
""" Return a dictionary of arguments to use with helper_c_code"""
"""
Return a dictionary of arguments to use with helper_c_code.
"""
return {'c_prefix': 'CudaNdarray',
'strides_mul': 4
}
def copy_into(self, view, source):
"""
view: string, C code expression for an array
source: string, C code expression for an array
returns a C code expression to copy source into view, and
return 0 on success
Parameters
----------
view : str
C code expression for an array.
source : str
C code expression for an array
Returns
-------
A C code expression to copy source into view, and 0 on success.
"""
# On the CPU it unbroadcast based on the run time shapes. We
# need the same behavior on the GPU.
......@@ -3245,6 +3317,7 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
"""
Implement Flatten on the gpu.
"""
def make_node(self, x):
assert isinstance(x.type, CudaNdarrayType)
......@@ -3257,6 +3330,7 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
class GpuShape(tensor.Shape, GpuOp):
"""
Implement Shape on the gpu.
"""
def make_node(self, x):
return Apply(self, [x], [tensor.lvector()])
......@@ -3266,6 +3340,7 @@ gpu_shape = GpuShape()
class GpuJoin(tensor.Join, GpuOp):
"""
Implement Join on the gpu.
"""
def make_node(self, *axis_and_tensors):
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
......@@ -3516,7 +3591,11 @@ class GpuSplit(tensor.Split, GpuOp):
class GpuAllocEmpty(GpuOp):
"""Implement Alloc on the gpu, but without initializing memory."""
"""
Implement Alloc on the gpu, but without initializing memory.
"""
__props__ = ()
@staticmethod
......@@ -3595,7 +3674,8 @@ gpu_alloc_empty = GpuAllocEmpty()
class GpuAlloc(GpuAllocEmpty):
"""Implement Alloc on the gpu.
"""
Implement Alloc on the gpu.
The memset_0 param is an optimization. When True, we call
cudaMemset that is faster.
......@@ -3706,8 +3786,10 @@ gpu_alloc = GpuAlloc()
class CopyOnNegativeStrides(GpuOp):
"""
Checks if the input has contains negative strides. If it
does, returns a c contiguous copy.
Checks if the input has contains negative strides.
If it does, returns a c contiguous copy.
"""
view_map = {0: [0]}
check_input = False
......@@ -3781,6 +3863,7 @@ class GpuContiguous(GpuOp):
"""
Always return a c contiguous output. Copy the input only if it is
not already c contiguous.
"""
view_map = {0: [0]}
check_input = False
......@@ -3855,9 +3938,16 @@ gpu_contiguous = GpuContiguous()
# Those are predifined CudaNdarrayType as done in tensor.basic
# Useful mostly for test as the gpu op are inserted automatically...
def scalar(name=None, dtype=None):
"""Return a symbolic scalar variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
"""
Return a symbolic scalar variable.
Parameters
----------
dtype
numeric type (None means to use theano.config.floatX).
name : str
A name to attach to this variable.
"""
if dtype is None:
dtype = config.floatX
......@@ -3867,9 +3957,16 @@ fscalar = CudaNdarrayType(dtype='float32', broadcastable=())
def vector(name=None, dtype=None):
"""Return a symbolic vector variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
"""
Return a symbolic vector variable.
Parameters
----------
dtype :
Numeric type (None means to use theano.config.floatX).
name :
A name to attach to this variable.
"""
if dtype is None:
dtype = config.floatX
......@@ -3879,9 +3976,16 @@ fvector = CudaNdarrayType(dtype='float32', broadcastable=(False, ))
def matrix(name=None, dtype=None):
"""Return a symbolic matrix variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
"""
Return a symbolic matrix variable.
Parameters
----------
dtype
Numeric type (None means to use theano.config.floatX).
name
A name to attach to this variable.
"""
if dtype is None:
dtype = config.floatX
......@@ -3891,9 +3995,16 @@ fmatrix = CudaNdarrayType(dtype='float32', broadcastable=(False, False))
def row(name=None, dtype=None):
"""Return a symbolic row variable (ndim=2, broadcastable=[True,False]).
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
"""
Return a symbolic row variable (ndim=2, broadcastable=[True,False]).
Parameters
----------
dtype
Numeric type (None means to use theano.config.floatX).
name : str
A name to attach to this variable.
"""
if dtype is None:
dtype = config.floatX
......@@ -3903,9 +4014,16 @@ frow = CudaNdarrayType(dtype='float32', broadcastable=(True, False))
def col(name=None, dtype=None):
"""Return a symbolic column variable (ndim=2, broadcastable=[False,True]).
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
"""
Return a symbolic column variable (ndim=2, broadcastable=[False,True]).
Parameters
----------
dtype
Numeric type (None means to use theano.config.floatX).
name : str
A name to attach to this variable.
"""
if dtype is None:
dtype = config.floatX
......@@ -3915,9 +4033,16 @@ fcol = CudaNdarrayType(dtype='float32', broadcastable=(False, True))
def tensor3(name=None, dtype=None):
"""Return a symbolic 3-D variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
"""
Return a symbolic 3-D variable.
Parameters
----------
dtype
Numeric type (None means to use theano.config.floatX).
name : str
A name to attach to this variable.
"""
if dtype is None:
dtype = config.floatX
......@@ -3927,9 +4052,16 @@ ftensor3 = CudaNdarrayType(dtype='float32', broadcastable=(False,) * 3)
def tensor4(name=None, dtype=None):
"""Return a symbolic 4-D variable.
:param dtype: numeric type (None means to use theano.config.floatX)
:param name: a name to attach to this variable
"""
Return a symbolic 4-D variable.
Parameters
----------
dtype
Numeric type (None means to use theano.config.floatX).
name : str
A name to attach to this variable.
"""
if dtype is None:
dtype = config.floatX
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论