提交 3bf6f4cb authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add a listing of ops for gpuarray.

上级 f005cc21
...@@ -13,4 +13,5 @@ ...@@ -13,4 +13,5 @@
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
op
type type
.. _libdoc_gpuarray_op:
================================
List of gpuarray Ops implemented
================================
.. moduleauthor:: LISA
Normally you should not call directly those Ops! Theano should
automatically transform cpu ops to their gpu equivalent. So this list
is just useful to let people know what is implemented on the gpu.
Basic Op
========
.. automodule:: theano.sandbox.gpuarray.basic_ops
:members:
Blas Op
=======
.. automodule:: theano.sandbox.gpuarray.blas
:members:
.. automodule:: theano.sandbox.gpuarray.nerv
:members:
Elemwise Op
===========
.. automodule:: theano.sandbox.gpuarray.elemwise
:members:
Subtensor Op
============
.. automodule:: theano.sandbox.gpuarray.subtensor
:members:
Nnet Op
=======
.. automodule:: theano.sandbox.gpuarray.nnet
:members:
.. automodule:: theano.sandbox.gpuarray.neighbours
:members:
...@@ -27,6 +27,20 @@ from .fp16_help import write_w ...@@ -27,6 +27,20 @@ from .fp16_help import write_w
def as_gpuarray_variable(x, context_name): def as_gpuarray_variable(x, context_name):
"""
This will attempt to convert `x` into a variable on the GPU.
It can take either a value of aother variable. If `x` is already
suitable, it will be returned as-is.
Parameters
----------
x
Object to convert
context_name : str or None
target context name for the result
"""
# If this is already some form of variable, try to avoid an extra transfer # If this is already some form of variable, try to avoid an extra transfer
if isinstance(x, Variable): if isinstance(x, Variable):
while True: while True:
...@@ -174,6 +188,13 @@ class Kernel(object): ...@@ -174,6 +188,13 @@ class Kernel(object):
class GpuKernelBase(object): class GpuKernelBase(object):
"""
Base class for operations that need to compile kernels.
It is not mandatory to use this class, but it helps with a lot of
the small things that you have to pay attention to.
"""
params_type = gpu_context_type params_type = gpu_context_type
def gpu_kernels(self, node, name): def gpu_kernels(self, node, name):
...@@ -274,10 +295,25 @@ class GpuKernelBase(object): ...@@ -274,10 +295,25 @@ class GpuKernelBase(object):
return (self.c_code_cache_version(), self.kernel_version(node)) return (self.c_code_cache_version(), self.kernel_version(node))
def kernel_version(self, node): def kernel_version(self, node):
"""
If you override :meth:`c_code_cache_version_apply`, call this
method to have the version of the kernel support code and
device.
Parameters
----------
node : apply node
The node that we need the cache version for.
"""
return (3, self.get_params(node).bin_id) return (3, self.get_params(node).bin_id)
class HostFromGpu(Op): class HostFromGpu(Op):
"""
Transfer data to CPU.
"""
__props__ = () __props__ = ()
_f16_ok = True _f16_ok = True
...@@ -356,6 +392,10 @@ host_from_gpu = HostFromGpu() ...@@ -356,6 +392,10 @@ host_from_gpu = HostFromGpu()
class GpuFromHost(Op): class GpuFromHost(Op):
"""
Transfer data to GPU.
"""
__props__ = ('context_name',) __props__ = ('context_name',)
_f16_ok = True _f16_ok = True
params_type = gpu_context_type params_type = gpu_context_type
...@@ -443,6 +483,10 @@ class GpuFromHost(Op): ...@@ -443,6 +483,10 @@ class GpuFromHost(Op):
class GpuToGpu(Op): class GpuToGpu(Op):
"""
Transfer data between GPUs.
"""
__props__ = ('context_name',) __props__ = ('context_name',)
_f16_ok = True _f16_ok = True
params_type = gpu_context_type params_type = gpu_context_type
...@@ -494,6 +538,7 @@ class GpuToGpu(Op): ...@@ -494,6 +538,7 @@ class GpuToGpu(Op):
class GpuAlloc(HideC, Alloc): class GpuAlloc(HideC, Alloc):
""" """
Allocate initialized memory on the GPU.
Parameters Parameters
---------- ----------
...@@ -654,6 +699,10 @@ class GpuAlloc(HideC, Alloc): ...@@ -654,6 +699,10 @@ class GpuAlloc(HideC, Alloc):
class GpuAllocEmpty(HideC, Alloc): class GpuAllocEmpty(HideC, Alloc):
"""
Allocate uninitialized memory on the GPU.
"""
__props__ = ('dtype', 'context_name') __props__ = ('dtype', 'context_name')
_f16_ok = True _f16_ok = True
params_type = gpu_context_type params_type = gpu_context_type
...@@ -732,8 +781,10 @@ def empty_like(var): ...@@ -732,8 +781,10 @@ def empty_like(var):
class GpuContiguous(Op): class GpuContiguous(Op):
""" """
Always return a c contiguous output. Copy the input only if it is Return a C contiguous version of the input.
not already c contiguous.
This may either pass the object as-is (if already C contiguous) or
make a copy.
""" """
__props__ = () __props__ = ()
...@@ -793,7 +844,7 @@ gpu_contiguous = GpuContiguous() ...@@ -793,7 +844,7 @@ gpu_contiguous = GpuContiguous()
class GpuReshape(HideC, tensor.Reshape): class GpuReshape(HideC, tensor.Reshape):
""" """
Implement Reshape on the gpu. Reshape for GPU variables.
""" """
...@@ -914,6 +965,10 @@ class GpuReshape(HideC, tensor.Reshape): ...@@ -914,6 +965,10 @@ class GpuReshape(HideC, tensor.Reshape):
class GpuJoin(HideC, Join): class GpuJoin(HideC, Join):
"""
Join for GPU.
"""
_f16_ok = True _f16_ok = True
params_type = gpu_context_type params_type = gpu_context_type
...@@ -991,6 +1046,10 @@ gpu_join = GpuJoin() ...@@ -991,6 +1046,10 @@ gpu_join = GpuJoin()
class GpuSplit(HideC, Split): class GpuSplit(HideC, Split):
"""
Split for GPU.
"""
def make_node(self, x, axis, splits): def make_node(self, x, axis, splits):
node = Split.make_node(self, x, axis, splits) node = Split.make_node(self, x, axis, splits)
x = as_gpuarray_variable(x, infer_context_name(x)) x = as_gpuarray_variable(x, infer_context_name(x))
...@@ -1002,6 +1061,10 @@ class GpuSplit(HideC, Split): ...@@ -1002,6 +1061,10 @@ class GpuSplit(HideC, Split):
class GpuEye(GpuKernelBase, Op): class GpuEye(GpuKernelBase, Op):
"""
Eye for GPU.
"""
__props__ = ('dtype', 'context_name') __props__ = ('dtype', 'context_name')
_f16_ok = True _f16_ok = True
......
...@@ -31,6 +31,10 @@ class BlasOp(Op): ...@@ -31,6 +31,10 @@ class BlasOp(Op):
class GpuGemv(BlasOp): class GpuGemv(BlasOp):
"""
Gemv on the GPU.
"""
__props__ = ('inplace',) __props__ = ('inplace',)
def __init__(self, inplace=False): def __init__(self, inplace=False):
...@@ -107,6 +111,10 @@ gpugemv_inplace = GpuGemv(inplace=True) ...@@ -107,6 +111,10 @@ gpugemv_inplace = GpuGemv(inplace=True)
class GpuGemm(BlasOp): class GpuGemm(BlasOp):
"""
Gemm on the GPU.
"""
__props__ = ('inplace',) __props__ = ('inplace',)
_f16_ok = True _f16_ok = True
...@@ -184,6 +192,10 @@ gpugemm_inplace = GpuGemm(inplace=True) ...@@ -184,6 +192,10 @@ gpugemm_inplace = GpuGemm(inplace=True)
class GpuGer(BlasOp): class GpuGer(BlasOp):
"""
Ger on the GPU.
"""
__props__ = ('inplace',) __props__ = ('inplace',)
def __init__(self, inplace=False): def __init__(self, inplace=False):
...@@ -256,6 +268,10 @@ gpuger_inplace = GpuGer(inplace=True) ...@@ -256,6 +268,10 @@ gpuger_inplace = GpuGer(inplace=True)
class GpuDot22(BlasOp): class GpuDot22(BlasOp):
"""
Dot22 on the GPU.
"""
__props__ = () __props__ = ()
def make_node(self, x, y): def make_node(self, x, y):
......
...@@ -57,6 +57,10 @@ def as_C_string_const(s): ...@@ -57,6 +57,10 @@ def as_C_string_const(s):
class GpuElemwise(GpuKernelBase, HideC, Elemwise): class GpuElemwise(GpuKernelBase, HideC, Elemwise):
"""
Elemwise on the GPU.
"""
nin = property(lambda self: self.scalar_op.nin) nin = property(lambda self: self.scalar_op.nin)
nout = property(lambda self: self.scalar_op.nout) nout = property(lambda self: self.scalar_op.nout)
_f16_ok = True _f16_ok = True
...@@ -445,6 +449,10 @@ class SupportCodeError(Exception): ...@@ -445,6 +449,10 @@ class SupportCodeError(Exception):
class GpuDimShuffle(HideC, DimShuffle): class GpuDimShuffle(HideC, DimShuffle):
"""
DimShuffle on the GPU.
"""
_f16_ok = True _f16_ok = True
def make_node(self, input): def make_node(self, input):
...@@ -548,7 +556,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -548,7 +556,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
Parameters Parameters
---------- ----------
reduce-mask reduce_mask
The dimensions along which to reduce. The `reduce_mask` is a tuple of The dimensions along which to reduce. The `reduce_mask` is a tuple of
booleans (actually integers 0 or 1) that specify for each input booleans (actually integers 0 or 1) that specify for each input
dimension, whether to reduce it (1) or not (0). dimension, whether to reduce it (1) or not (0).
...@@ -1279,14 +1287,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1279,14 +1287,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
""" % locals() """ % locals()
def c_code_reduce_ccontig(self, sio, node, name, x, z, fail): def c_code_reduce_ccontig(self, sio, node, name, x, z, fail):
"""
WRITEME
IG: I believe, based on how this is called in c_code, that it
is for the case where we are reducing on all axes and x is
C contiguous.
"""
in_dtype = "npy_" + node.inputs[0].dtype in_dtype = "npy_" + node.inputs[0].dtype
out_dtype = "npy_" + node.outputs[0].dtype out_dtype = "npy_" + node.outputs[0].dtype
if getattr(self.scalar_op, 'identity', None) == 0: if getattr(self.scalar_op, 'identity', None) == 0:
...@@ -2666,8 +2666,6 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2666,8 +2666,6 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
""" """
CAReduce that reuse the python code from gpuarray. CAReduce that reuse the python code from gpuarray.
Too slow for now as it only have a python interface.
""" """
def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None): def __init__(self, scalar_op, axis=None, dtype=None, acc_dtype=None):
if not hasattr(scalar_op, 'identity'): if not hasattr(scalar_op, 'identity'):
......
...@@ -17,6 +17,10 @@ from .type import GpuArrayType ...@@ -17,6 +17,10 @@ from .type import GpuArrayType
class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
"""
Images2Neibs for the GPU.
"""
def __init__(self, mode='valid'): def __init__(self, mode='valid'):
if mode not in ['valid', 'ignore_borders', 'wrap_centered']: if mode not in ['valid', 'ignore_borders', 'wrap_centered']:
raise NotImplementedError("Only the mode valid, ignore_borders" raise NotImplementedError("Only the mode valid, ignore_borders"
......
...@@ -41,6 +41,9 @@ def ensure_float(val, name): ...@@ -41,6 +41,9 @@ def ensure_float(val, name):
class Gemm16(COp): class Gemm16(COp):
"""
Gemm for float16 using the nervena kernels.
"""
__props__ = ('relu', 'inplace') __props__ = ('relu', 'inplace')
_f16_ok = True _f16_ok = True
params_type = gpu_context_type params_type = gpu_context_type
......
...@@ -24,6 +24,9 @@ from .elemwise import GpuElemwise ...@@ -24,6 +24,9 @@ from .elemwise import GpuElemwise
class GpuSubtensor(HideC, Subtensor): class GpuSubtensor(HideC, Subtensor):
"""
Subtensor on the GPU.
"""
_f16_ok = True _f16_ok = True
def make_node(self, x, *inputs): def make_node(self, x, *inputs):
...@@ -173,8 +176,8 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -173,8 +176,8 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
The optimization to make this inplace is in tensor/opt. The optimization to make this inplace is in tensor/opt.
The same optimization handles IncSubtensor and GpuIncSubtensor. The same optimization handles IncSubtensor and GpuIncSubtensor.
This Op has c_code too; it inherits tensor.IncSubtensor's c_code. This Op has c_code too; it inherits tensor.IncSubtensor's c_code.
The helper methods like do_type_checking, copy_of_x, etc. specialize The helper methods like :meth:`do_type_checking`,
the c_code for this Op. :meth:`copy_of_x`, etc. specialize the c_code for this Op.
""" """
...@@ -405,6 +408,9 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -405,6 +408,9 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1): class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
"""
AdvancedSubrensor1 on the GPU.
"""
def make_node(self, x, ilist): def make_node(self, x, ilist):
ctx_name = infer_context_name(x, ilist) ctx_name = infer_context_name(x, ilist)
x_ = as_gpuarray_variable(x, ctx_name) x_ = as_gpuarray_variable(x, ctx_name)
...@@ -580,8 +586,10 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1): ...@@ -580,8 +586,10 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
_f16_ok = True _f16_ok = True
def make_node(self, x, y, ilist): def make_node(self, x, y, ilist):
"""It defer from GpuAdvancedIncSubtensor1 in that it make sure """
the index are of type long. It differs from GpuAdvancedIncSubtensor1 in that it makes sure
the indexes are of type long.
""" """
ctx_name = infer_context_name(x, y, ilist) ctx_name = infer_context_name(x, y, ilist)
x_ = as_gpuarray_variable(x, ctx_name) x_ = as_gpuarray_variable(x, ctx_name)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论