提交 3bf6f4cb authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add a listing of ops for gpuarray.

上级 f005cc21
......@@ -13,4 +13,5 @@
.. toctree::
:maxdepth: 1
op
type
.. _libdoc_gpuarray_op:
================================
List of gpuarray Ops implemented
================================
.. moduleauthor:: LISA
Normally you should not call directly those Ops! Theano should
automatically transform cpu ops to their gpu equivalent. So this list
is just useful to let people know what is implemented on the gpu.
Basic Op
========
.. automodule:: theano.sandbox.gpuarray.basic_ops
:members:
Blas Op
=======
.. automodule:: theano.sandbox.gpuarray.blas
:members:
.. automodule:: theano.sandbox.gpuarray.nerv
:members:
Elemwise Op
===========
.. automodule:: theano.sandbox.gpuarray.elemwise
:members:
Subtensor Op
============
.. automodule:: theano.sandbox.gpuarray.subtensor
:members:
Nnet Op
=======
.. automodule:: theano.sandbox.gpuarray.nnet
:members:
.. automodule:: theano.sandbox.gpuarray.neighbours
:members:
......@@ -27,6 +27,20 @@ from .fp16_help import write_w
def as_gpuarray_variable(x, context_name):
"""
This will attempt to convert `x` into a variable on the GPU.
It can take either a value of aother variable. If `x` is already
suitable, it will be returned as-is.
Parameters
----------
x
Object to convert
context_name : str or None
target context name for the result
"""
# If this is already some form of variable, try to avoid an extra transfer
if isinstance(x, Variable):
while True:
......@@ -174,6 +188,13 @@ class Kernel(object):
class GpuKernelBase(object):
"""
Base class for operations that need to compile kernels.
It is not mandatory to use this class, but it helps with a lot of
the small things that you have to pay attention to.
"""
params_type = gpu_context_type
def gpu_kernels(self, node, name):
......@@ -274,10 +295,25 @@ class GpuKernelBase(object):
return (self.c_code_cache_version(), self.kernel_version(node))
def kernel_version(self, node):
"""
If you override :meth:`c_code_cache_version_apply`, call this
method to have the version of the kernel support code and
device.
Parameters
----------
node : apply node
The node that we need the cache version for.
"""
return (3, self.get_params(node).bin_id)
class HostFromGpu(Op):
"""
Transfer data to CPU.
"""
__props__ = ()
_f16_ok = True
......@@ -356,6 +392,10 @@ host_from_gpu = HostFromGpu()
class GpuFromHost(Op):
"""
Transfer data to GPU.
"""
__props__ = ('context_name',)
_f16_ok = True
params_type = gpu_context_type
......@@ -443,6 +483,10 @@ class GpuFromHost(Op):
class GpuToGpu(Op):
"""
Transfer data between GPUs.
"""
__props__ = ('context_name',)
_f16_ok = True
params_type = gpu_context_type
......@@ -494,6 +538,7 @@ class GpuToGpu(Op):
class GpuAlloc(HideC, Alloc):
"""
Allocate initialized memory on the GPU.
Parameters
----------
......@@ -654,6 +699,10 @@ class GpuAlloc(HideC, Alloc):
class GpuAllocEmpty(HideC, Alloc):
"""
Allocate uninitialized memory on the GPU.
"""
__props__ = ('dtype', 'context_name')
_f16_ok = True
params_type = gpu_context_type
......@@ -732,8 +781,10 @@ def empty_like(var):
class GpuContiguous(Op):
"""
Always return a c contiguous output. Copy the input only if it is
not already c contiguous.
Return a C contiguous version of the input.
This may either pass the object as-is (if already C contiguous) or
make a copy.
"""
__props__ = ()
......@@ -793,7 +844,7 @@ gpu_contiguous = GpuContiguous()
class GpuReshape(HideC, tensor.Reshape):
"""
Implement Reshape on the gpu.
Reshape for GPU variables.
"""
......@@ -914,6 +965,10 @@ class GpuReshape(HideC, tensor.Reshape):
class GpuJoin(HideC, Join):
"""
Join for GPU.
"""
_f16_ok = True
params_type = gpu_context_type
......@@ -991,6 +1046,10 @@ gpu_join = GpuJoin()
class GpuSplit(HideC, Split):
"""
Split for GPU.
"""
def make_node(self, x, axis, splits):
node = Split.make_node(self, x, axis, splits)
x = as_gpuarray_variable(x, infer_context_name(x))
......@@ -1002,6 +1061,10 @@ class GpuSplit(HideC, Split):
class GpuEye(GpuKernelBase, Op):
"""
Eye for GPU.
"""
__props__ = ('dtype', 'context_name')
_f16_ok = True
......
......@@ -31,6 +31,10 @@ class BlasOp(Op):
class GpuGemv(BlasOp):
"""
Gemv on the GPU.
"""
__props__ = ('inplace',)
def __init__(self, inplace=False):
......@@ -107,6 +111,10 @@ gpugemv_inplace = GpuGemv(inplace=True)
class GpuGemm(BlasOp):
"""
Gemm on the GPU.
"""
__props__ = ('inplace',)
_f16_ok = True
......@@ -184,6 +192,10 @@ gpugemm_inplace = GpuGemm(inplace=True)
class GpuGer(BlasOp):
"""
Ger on the GPU.
"""
__props__ = ('inplace',)
def __init__(self, inplace=False):
......@@ -256,6 +268,10 @@ gpuger_inplace = GpuGer(inplace=True)
class GpuDot22(BlasOp):
"""
Dot22 on the GPU.
"""
__props__ = ()
def make_node(self, x, y):
......
......@@ -57,6 +57,10 @@ def as_C_string_const(s):
class GpuElemwise(GpuKernelBase, HideC, Elemwise):
"""
Elemwise on the GPU.
"""
nin = property(lambda self: self.scalar_op.nin)
nout = property(lambda self: self.scalar_op.nout)
_f16_ok = True
......@@ -445,6 +449,10 @@ class SupportCodeError(Exception):
class GpuDimShuffle(HideC, DimShuffle):
"""
DimShuffle on the GPU.
"""
_f16_ok = True
def make_node(self, input):
......@@ -548,7 +556,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
Parameters
----------
reduce-mask
reduce_mask
The dimensions along which to reduce. The `reduce_mask` is a tuple of
booleans (actually integers 0 or 1) that specify for each input
dimension, whether to reduce it (1) or not (0).
......@@ -1279,14 +1287,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
""" % locals()
def c_code_reduce_ccontig(self, sio, node, name, x, z, fail):
"""
WRITEME
IG: I believe, based on how this is called in c_code, that it
is for the case where we are reducing on all axes and x is
C contiguous.
"""
in_dtype = "npy_" + node.inputs[0].dtype
out_dtype = "npy_" + node.outputs[0].dtype
if getattr(self.scalar_op, 'identity', None) == 0:
......@@ -2666,8 +2666,6 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
"""
CAReduce that reuse the python code from gpuarray.
Too slow for now as it only have a python interface.
"""
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
if not hasattr(scalar_op, 'identity'):
......
......@@ -17,6 +17,10 @@ from .type import GpuArrayType
class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
"""
Images2Neibs for the GPU.
"""
def __init__(self, mode='valid'):
if mode not in ['valid', 'ignore_borders', 'wrap_centered']:
raise NotImplementedError("Only the mode valid, ignore_borders"
......
......@@ -41,6 +41,9 @@ def ensure_float(val, name):
class Gemm16(COp):
"""
Gemm for float16 using the nervena kernels.
"""
__props__ = ('relu', 'inplace')
_f16_ok = True
params_type = gpu_context_type
......
......@@ -24,6 +24,9 @@ from .elemwise import GpuElemwise
class GpuSubtensor(HideC, Subtensor):
"""
Subtensor on the GPU.
"""
_f16_ok = True
def make_node(self, x, *inputs):
......@@ -173,8 +176,8 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
The optimization to make this inplace is in tensor/opt.
The same optimization handles IncSubtensor and GpuIncSubtensor.
This Op has c_code too; it inherits tensor.IncSubtensor's c_code.
The helper methods like do_type_checking, copy_of_x, etc. specialize
the c_code for this Op.
The helper methods like :meth:`do_type_checking`,
:meth:`copy_of_x`, etc. specialize the c_code for this Op.
"""
......@@ -405,6 +408,9 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
"""
AdvancedSubrensor1 on the GPU.
"""
def make_node(self, x, ilist):
ctx_name = infer_context_name(x, ilist)
x_ = as_gpuarray_variable(x, ctx_name)
......@@ -580,8 +586,10 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
_f16_ok = True
def make_node(self, x, y, ilist):
"""It defer from GpuAdvancedIncSubtensor1 in that it make sure
the index are of type long.
"""
It differs from GpuAdvancedIncSubtensor1 in that it makes sure
the indexes are of type long.
"""
ctx_name = infer_context_name(x, y, ilist)
x_ = as_gpuarray_variable(x, ctx_name)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论