提交 34f1dd14 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add a scheduled call wrapper to do the scheduling and the call in one swoop.

上级 edd1c456
...@@ -173,11 +173,15 @@ class Kernel(object): ...@@ -173,11 +173,15 @@ class Kernel(object):
fname: str fname: str
the name of the function wrapper. the name of the function wrapper.
(defaults to name + `_call`) (defaults to name + `_call`)
sname: str
the name of the scheduled call function
(defaults to name _ `_scall`)
""" """
def __init__(self, code, params, name, flags, def __init__(self, code, params, name, flags,
codevar=None, binvar=None, objvar=None, fname=None): codevar=None, binvar=None, objvar=None, fname=None,
sname=None):
self.code = code self.code = code
self.params = params self.params = params
self.name = name self.name = name
...@@ -194,6 +198,9 @@ class Kernel(object): ...@@ -194,6 +198,9 @@ class Kernel(object):
if fname is None: if fname is None:
fname = name + '_call' fname = name + '_call'
self.fname = fname self.fname = fname
if sname is None:
sname = name + '_scall'
self.sname = sname
@staticmethod @staticmethod
def get_flags(*types): def get_flags(*types):
...@@ -338,22 +345,30 @@ class GpuKernelBase(object): ...@@ -338,22 +345,30 @@ class GpuKernelBase(object):
setargs = '\n '.join(setargs) setargs = '\n '.join(setargs)
return """ return """
int {fname}(unsigned int nd, size_t *gdim, size_t *ldim, size_t shared, int {fname}(unsigned int _nd, size_t *_gdim, size_t *_ldim, size_t _shared,
{args}) {{ {args}) {{
{setargs} {setargs}
return GpuKernel_call(&{kname}, nd, ldim, gdim, shared, NULL); return GpuKernel_call(&{kname}, _nd, _ldim, _gdim, _shared, NULL);
}} }}
""".format(args=args, fname=k.fname, setargs=setargs, kname=k.objvar)
def c_support_code(self): int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
return """ size_t _ls = 0;
template <typename T> size_t _gs = 0;
static T ceil_intdiv(T a, T b) int _err;
{
return (a/b) + ((a % b) ? 1: 0); if (_nd != 1) return GA_UNSUPPORTED_ERROR;
}
""" _err = GpuKernel_sched(&{kname}, _n[0], &_ls, &_gs);
if (_err != GA_NO_ERROR)
return _err;
{setargs}
return GpuKernel_call(&{kname}, 1, &_ls, &_gs, _shared, NULL);
}}
""".format(args=args, fname=k.fname, setargs=setargs, sname=k.sname,
kname=k.objvar)
def c_support_code_apply(self, node, name): def c_support_code_apply(self, node, name):
kernels = self.gpu_kernels(node, name) kernels = self.gpu_kernels(node, name)
...@@ -428,7 +443,7 @@ int {fname}(unsigned int nd, size_t *gdim, size_t *ldim, size_t shared, ...@@ -428,7 +443,7 @@ int {fname}(unsigned int nd, size_t *gdim, size_t *ldim, size_t shared,
The node that we need the cache version for. The node that we need the cache version for.
""" """
return (6, self.get_params(node).bin_id) return (7, self.get_params(node).bin_id)
def forward_string_meth(name): def forward_string_meth(name):
...@@ -466,12 +481,14 @@ class CGpuKernelBase(COp, GpuKernelBase): ...@@ -466,12 +481,14 @@ class CGpuKernelBase(COp, GpuKernelBase):
kernel_re = re.compile(r'^#kernel ([a-zA-Z_].*?)$', re.MULTILINE) kernel_re = re.compile(r'^#kernel ([a-zA-Z_].*?)$', re.MULTILINE)
c_support_code = forward_string_meth('c_support_code')
c_support_code_apply = forward_string_meth('c_support_code_apply') c_support_code_apply = forward_string_meth('c_support_code_apply')
c_support_code_struct = forward_string_meth('c_support_code_struct') c_support_code_struct = forward_string_meth('c_support_code_struct')
c_init_code_struct = forward_string_meth('c_init_code_struct') c_init_code_struct = forward_string_meth('c_init_code_struct')
c_cleanup_code_struct = forward_string_meth('c_cleanup_code_struct') c_cleanup_code_struct = forward_string_meth('c_cleanup_code_struct')
def c_code_cache_version_apply(self, node):
return GpuKernelBase.c_code_cache_version_apply(self, node)
def _type_macros(self, node): def _type_macros(self, node):
define_template = "#define %s %s\n" define_template = "#define %s %s\n"
undef_template = "#undef %s\n" undef_template = "#undef %s\n"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论