提交 2c49663e authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix a bunch of problems that I somehow managed to miss when testing the previous commit.

上级 a8b3b329
...@@ -120,7 +120,7 @@ class Kernel(object): ...@@ -120,7 +120,7 @@ class Kernel(object):
if t == gpuarray.GpuArray: if t == gpuarray.GpuArray:
return "GA_BUFFER" return "GA_BUFFER"
else: else:
return gpuarray.dtype_to_typecode(t) return str(gpuarray.dtype_to_typecode(t))
return ', '.join(m(t) for t in self.params) return ', '.join(m(t) for t in self.params)
...@@ -141,9 +141,9 @@ class GpuKernelBase(object): ...@@ -141,9 +141,9 @@ class GpuKernelBase(object):
return o + ['gpuarray/types.h'] return o + ['gpuarray/types.h']
def _generate_kernel_bin(self, k): def _generate_kernel_bin(self, k):
k = gpuarray.GpuKernel(k.code, k.name, k.params, **k.flags) gk = gpuarray.GpuKernel(k.code, k.name, k.params, **k.flags)
bin = k._binary bin = gk._binary
bocde = ','.join(hex(ord(c)) for c in bin) bcode = ','.join(hex(ord(c)) for c in bin)
return ("""static const char %(bname)s[] = { %(bcode)s };""" % return ("""static const char %(bname)s[] = { %(bcode)s };""" %
dict(bname=k.binvar, bcode=bcode)) dict(bname=k.binvar, bcode=bcode))
...@@ -154,7 +154,7 @@ class GpuKernelBase(object): ...@@ -154,7 +154,7 @@ class GpuKernelBase(object):
dict(cname=k.codevar, code=code)) dict(cname=k.codevar, code=code))
def _generate_kernel_vars(self, k): def _generate_kernel_vars(self, k):
return """static GpuKernel %(kname)s;""" % dict(k.objname) return """static GpuKernel %(kname)s;""" % dict(kname=k.objvar)
def c_support_code_apply(self, node, name): def c_support_code_apply(self, node, name):
kernels = self.gpu_kernels(node, name) kernels = self.gpu_kernels(node, name)
...@@ -172,14 +172,14 @@ class GpuKernelBase(object): ...@@ -172,14 +172,14 @@ class GpuKernelBase(object):
int types[%(numargs)u] = {%(types)s}; int types[%(numargs)u] = {%(types)s};
const char *bcode = %(bvar)s; const char *bcode = %(bvar)s;
size_t sz = sizeof(%(bvar)s); size_t sz = sizeof(%(bvar)s);
GpuContext *c = pygpu_default_context(); PyGpuContextObject *c = pygpu_default_context();
if (GpuKernel_init(%(ovar)s, c->ops, c->ctx, 1, &bcode, &sz, "%(kname)s", if (GpuKernel_init(&%(ovar)s, c->ops, c->ctx, 1, &bcode, &sz, "%(kname)s",
%(numargs)u, types, GA_USE_BINARY) != GA_NO_ERROR) { %(numargs)u, types, GA_USE_BINARY) != GA_NO_ERROR) {
if ((%(err)s = GpuKernel_init(%(ovar)s, c->ops, c->ctx, 1, &%(cname)s, if ((%(err)s = GpuKernel_init(&%(ovar)s, c->ops, c->ctx, 1, &%(cname)s,
NULL, "%(kname)s", %(numargs)u, types, NULL, "%(kname)s", %(numargs)u, types,
%(flags)s)) != GA_NO_ERROR) { %(flags)s)) != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuKernel_init error %%d: %%s", PyErr_Format(PyExc_RuntimeError, "GpuKernel_init error %%d: %%s",
err, Gpu_error(c->ops, c->ctx, err)); %(err)s, Gpu_error(c->ops, c->ctx, %(err)s));
return %(error_out)s; return %(error_out)s;
} }
} }
...@@ -823,8 +823,8 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) { ...@@ -823,8 +823,8 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
return [Kernel( return [Kernel(
code=code, name="k", code=code, name="k",
params=[gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SIZE], params=[gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SIZE],
flags=Kernel.get_flags(self.dtype) flags=Kernel.get_flags(self.dtype),
objname='k_eye_'+name, objvar='k_eye_'+name,
)] )]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
......
...@@ -2395,7 +2395,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2395,7 +2395,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
flags=Kernel.get_flags(node.inputs[0].type.dtype, flags=Kernel.get_flags(node.inputs[0].type.dtype,
acc_dtype, acc_dtype,
node.outputs[0].type.dtype), node.outputs[0].type.dtype),
objname='k_reduk_'+name)] objvar='k_reduk_'+name)]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
if not any(getattr(self, 'redux', [node.inputs[0].ndim != 0])): if not any(getattr(self, 'redux', [node.inputs[0].ndim != 0])):
......
...@@ -868,7 +868,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -868,7 +868,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
params=[gpuarray.GpuArray, gpuarray.GpuArray, params=[gpuarray.GpuArray, gpuarray.GpuArray,
'uint32', 'uint32'], 'uint32', 'uint32'],
flags=Kernel.get_flags(self.output_type.dtype, 'int32'), flags=Kernel.get_flags(self.output_type.dtype, 'int32'),
objname='k_mrg_uniform')] objvar='k_mrg_uniform')]
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
rstate, size = inp rstate, size = inp
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论