提交 3f6653ec authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add a wrapper function for kernels to simplify calling.

This comes with tests and docs changes. This also updates GpuEye to use it so that we have a real-world example.
上级 d79b0fed
...@@ -152,30 +152,24 @@ go this way, then you can look up the C API for kernels in ...@@ -152,30 +152,24 @@ go this way, then you can look up the C API for kernels in
libgpuarray. libgpuarray.
In any case you will need to call your compiled kernel with some data, In any case you will need to call your compiled kernel with some data,
in most cases in your :meth:`c_code` method. This is done using the in most cases in your :meth:`c_code` method. This is done by using
`GpuKernel_call() the provided wrapper function. An example calling the above kernel
<http://deeplearning.net/software/libgpuarray/c_api.html#GpuKernel_call>`_ would be::
function in your C code. An example calling the above kernel would
be::
size_t ls, gs; size_t ls, gs;
size_t dims[2]; size_t dims[2];
void *args[3];
// ... // ...
args[0] = input->ga.data;
args[1] = &dims[0];
args[2] = &dims[1];
ls = 1; ls = 1;
gs = 256; gs = 256;
err = GpuKernel_call(&k_k, 1, &ls, &gs, 0, args); err = k_call(1, &ls, &gs, 0, input->ga.data, dims[0], dims[1]);
// ... // ...
The name of the kernel object depends on the name you passed to The name of the wrapper function depends on the name you passed to
``Kernel()`` when you declared it (or the name in your `#kernel` ``Kernel()`` when you declared it (or the name in your `#kernel`
statement). It defaults to `'k_' + name`. statement). It defaults to `name + '_call'`.
For other operations in the C code you should refer to the For other operations in the C code you should refer to the
`libgpuarray documentation `libgpuarray documentation
......
...@@ -169,11 +169,14 @@ class Kernel(object): ...@@ -169,11 +169,14 @@ class Kernel(object):
objvar: str objvar: str
the name of the variable for the kernel object. the name of the variable for the kernel object.
(defaults to `k_` + name) (defaults to `k_` + name)
fname: str
the name of the function wrapper.
(defaults to name + `_call`)
""" """
def __init__(self, code, params, name, flags, def __init__(self, code, params, name, flags,
codevar=None, binvar=None, objvar=None): codevar=None, binvar=None, objvar=None, fname=None):
self.code = code self.code = code
self.params = params self.params = params
self.name = name self.name = name
...@@ -187,6 +190,9 @@ class Kernel(object): ...@@ -187,6 +190,9 @@ class Kernel(object):
if objvar is None: if objvar is None:
objvar = 'k_' + name objvar = 'k_' + name
self.objvar = objvar self.objvar = objvar
if fname is None:
fname = name + '_call'
self.fname = fname
@staticmethod @staticmethod
def get_flags(*types): def get_flags(*types):
...@@ -253,6 +259,17 @@ class Kernel(object): ...@@ -253,6 +259,17 @@ class Kernel(object):
return ', '.join(m(t) for t in self.params) return ', '.join(m(t) for t in self.params)
def get_ctype(dtype):
if dtype is gpuarray.GpuArray:
return "gpudata *"
if dtype == gpuarray.SIZE:
return "size_t"
if dtype == gpuarray.SSIZE:
return "ssize_t"
else:
return dtype.name + '_t'
class GpuKernelBase(object): class GpuKernelBase(object):
""" """
Base class for operations that need to compile kernels. Base class for operations that need to compile kernels.
...@@ -295,6 +312,29 @@ class GpuKernelBase(object): ...@@ -295,6 +312,29 @@ class GpuKernelBase(object):
def _generate_kernel_vars(self, k): def _generate_kernel_vars(self, k):
return """GpuKernel %(kname)s;""" % dict(kname=k.objvar) return """GpuKernel %(kname)s;""" % dict(kname=k.objvar)
def _generate_kernel_wrap(self, k):
args = []
setargs = []
for i, p in enumerate(k.params):
args.append("{} arg{}".format(get_ctype(p), i))
if p is gpuarray.GpuArray:
setarg = "GpuKernel_setarg(&{0}, {1}, arg{1});"
else:
setarg = "GpuKernel_setarg(&{0}, {1}, &arg{1});"
setargs.append(setarg.format(k.objvar, i))
args = ', '.join(args)
setargs = '\n '.join(setargs)
return """
int {fname}(unsigned int nd, size_t *ldim, size_t *gdim, size_t shared,
{args}) {{
{setargs}
return GpuKernel_call(&{kname}, nd, ldim, gdim, shared, NULL);
}}
""".format(args=args, fname=k.fname, setargs=setargs, kname=k.objvar)
def c_support_code(self): def c_support_code(self):
return """ return """
template <typename T> template <typename T>
...@@ -313,7 +353,9 @@ class GpuKernelBase(object): ...@@ -313,7 +353,9 @@ class GpuKernelBase(object):
def c_support_code_struct(self, node, name): def c_support_code_struct(self, node, name):
kernels = self.gpu_kernels(node, name) kernels = self.gpu_kernels(node, name)
return '\n'.join(self._generate_kernel_vars(k) for k in kernels) kvars = '\n'.join(self._generate_kernel_vars(k) for k in kernels)
wrappers = '\n'.join(self._generate_kernel_wrap(k) for k in kernels)
return kvars + '\n' + wrappers
def _generate_zeros(self, k): def _generate_zeros(self, k):
return """memset(&%(v)s, 0, sizeof(%(v)s));""" % dict(v=k.objvar) return """memset(&%(v)s, 0, sizeof(%(v)s));""" % dict(v=k.objvar)
...@@ -375,7 +417,7 @@ class GpuKernelBase(object): ...@@ -375,7 +417,7 @@ class GpuKernelBase(object):
The node that we need the cache version for. The node that we need the cache version for.
""" """
return (4, self.get_params(node).bin_id) return (5, self.get_params(node).bin_id)
def forward_string_meth(name): def forward_string_meth(name):
...@@ -1309,7 +1351,7 @@ class GpuEye(GpuKernelBase, Op): ...@@ -1309,7 +1351,7 @@ class GpuEye(GpuKernelBase, Op):
def gpu_kernels(self, node, name): def gpu_kernels(self, node, name):
code = """ code = """
KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) { KERNEL void eye(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
ga_size nb = n < m ? n : m; ga_size nb = n < m ? n : m;
for (ga_size i = LID_0; i < nb; i += LDIM_0) { for (ga_size i = LID_0; i < nb; i += LDIM_0) {
a[i*m + i] = %(write_a)s(1); a[i*m + i] = %(write_a)s(1);
...@@ -1317,7 +1359,7 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) { ...@@ -1317,7 +1359,7 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
}""" % dict(ctype=pygpu.gpuarray.dtype_to_ctype(self.dtype), }""" % dict(ctype=pygpu.gpuarray.dtype_to_ctype(self.dtype),
name=name, write_a=write_w(self.dtype)) name=name, write_a=write_w(self.dtype))
return [Kernel( return [Kernel(
code=code, name="k", code=code, name="eye",
params=[gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SIZE], params=[gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SIZE],
flags=Kernel.get_flags(self.dtype), flags=Kernel.get_flags(self.dtype),
objvar='k_eye_' + name)] objvar='k_eye_' + name)]
...@@ -1333,7 +1375,6 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) { ...@@ -1333,7 +1375,6 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
s = """ s = """
size_t dims[2] = {0, 0}; size_t dims[2] = {0, 0};
size_t ls, gs; size_t ls, gs;
void *args[3];
int err; int err;
dims[0] = ((dtype_%(n)s*)PyArray_DATA(%(n)s))[0]; dims[0] = ((dtype_%(n)s*)PyArray_DATA(%(n)s))[0];
...@@ -1348,12 +1389,9 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) { ...@@ -1348,12 +1389,9 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
%(fail)s %(fail)s
} }
args[0] = %(z)s->ga.data;
args[1] = &dims[0];
args[2] = &dims[1];
ls = 1; ls = 1;
gs = 256; gs = 256;
err = GpuKernel_call(&%(kname)s, 1, &ls, &gs, 0, args); err = eye_call(1, &ls, &gs, 0, %(z)s->ga.data, dims[0], dims[1]);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"gpuarray error: kEye: %%s. n%%lu, m=%%lu.", "gpuarray error: kEye: %%s. n%%lu, m=%%lu.",
...@@ -1369,4 +1407,4 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) { ...@@ -1369,4 +1407,4 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
return s return s
def c_code_cache_version(self): def c_code_cache_version(self):
return (5,) return (6,)
...@@ -34,13 +34,10 @@ int APPLY_SPECIFIC(tstgpueye)(PyArrayObject *n, PyArrayObject *m, ...@@ -34,13 +34,10 @@ int APPLY_SPECIFIC(tstgpueye)(PyArrayObject *n, PyArrayObject *m,
if (*z == NULL) if (*z == NULL)
return -1; return -1;
args[0] = (*z)->ga.data;
args[1] = &dims[0];
args[2] = &dims[1];
ls = 1; ls = 1;
gs = 256; gs = 256;
/* The k_eye name comes from the kernel declaration above. */ /* The eye_call name comes from the kernel declaration above. */
err = GpuKernel_call(&k_eye, 1, &ls, &gs, 0, args); err = eye_call(1, &ls, &gs, 0, (*z)->ga.data, dims[0], dims[1]);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"gpuarray error: kEye: %s. n%lu, m=%lu.", "gpuarray error: kEye: %s. n%lu, m=%lu.",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论