提交 32a13f0e authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fixes found from test_basic_ops.py

上级 dd51383f
...@@ -54,7 +54,7 @@ if pygpu: ...@@ -54,7 +54,7 @@ if pygpu:
from .basic_ops import (GpuAlloc, GpuContiguous, GpuEye, GpuFromHost, from .basic_ops import (GpuAlloc, GpuContiguous, GpuEye, GpuFromHost,
GpuJoin, GpuReshape, GpuSplit, HostFromGpu) GpuJoin, GpuReshape, GpuSplit, HostFromGpu)
from .basic_ops import host_from_gpu, gpu_from_host from .basic_ops import host_from_gpu, GpuFromHost
from .elemwise import GpuElemwise from .elemwise import GpuElemwise
from .subtensor import (GpuSubtensor, GpuIncSubtensor, from .subtensor import (GpuSubtensor, GpuIncSubtensor,
GpuAdvancedIncSubtensor1) GpuAdvancedIncSubtensor1)
......
...@@ -181,7 +181,7 @@ class GpuKernelBase(object): ...@@ -181,7 +181,7 @@ class GpuKernelBase(object):
dict(cname=k.codevar, code=code)) dict(cname=k.codevar, code=code))
def _generate_kernel_vars(self, k): def _generate_kernel_vars(self, k):
return """static GpuKernel %(kname)s;""" % dict(kname=k.objvar) return """GpuKernel %(kname)s;""" % dict(kname=k.objvar)
def c_support_code(self): def c_support_code(self):
return """ return """
...@@ -215,11 +215,11 @@ class GpuKernelBase(object): ...@@ -215,11 +215,11 @@ class GpuKernelBase(object):
if (GpuKernel_init(&%(ovar)s, %(ctx)s->ops, %(ctx)s->ctx, 1, &bcode, &sz, if (GpuKernel_init(&%(ovar)s, %(ctx)s->ops, %(ctx)s->ctx, 1, &bcode, &sz,
"%(kname)s", %(numargs)u, types, GA_USE_BINARY, NULL) "%(kname)s", %(numargs)u, types, GA_USE_BINARY, NULL)
!= GA_NO_ERROR) { != GA_NO_ERROR) {
if ((%(err)s = GpuKernel_init(&%(ovar)s, %(ctx)s->ops, %(ctx)s->ctx, 1, if ((err = GpuKernel_init(&%(ovar)s, %(ctx)s->ops, %(ctx)s->ctx, 1,
&%(cname)s, NULL, "%(kname)s", %(numargs)u, &%(cname)s, NULL, "%(kname)s", %(numargs)u,
types, %(flags)s, NULL)) != GA_NO_ERROR) { types, %(flags)s, NULL)) != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuKernel_init error %%d: %%s", PyErr_Format(PyExc_RuntimeError, "GpuKernel_init error %%d: %%s",
%(err)s, Gpu_error(%(ctx)s->ops, %(ctx)s->ctx, %(err)s)); err, Gpu_error(%(ctx)s->ops, %(ctx)s->ctx, err));
%(fail)s %(fail)s
} }
} }
...@@ -371,7 +371,7 @@ class GpuFromHost(Op): ...@@ -371,7 +371,7 @@ class GpuFromHost(Op):
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
return """ return """
PyGpuArrayObject *%(name)s_tmp; PyArrayObject *%(name)s_tmp;
%(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s); %(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s);
if (%(name)s_tmp == NULL) if (%(name)s_tmp == NULL)
%(fail)s %(fail)s
...@@ -551,9 +551,9 @@ class GpuAlloc(HideC, Alloc): ...@@ -551,9 +551,9 @@ class GpuAlloc(HideC, Alloc):
if (err != GA_NO_ERROR) if (err != GA_NO_ERROR)
{ {
PyErr_Format(PyExc_MemoryError, PyErr_Format(PyExc_MemoryError,
"GpuAlloc: Error memsetting %%d" "GpuAlloc: Error memsetting %%llu"
" element of device memory to 0.", " element of device memory to 0.",
PyGpuArray_SIZE(%(zz)s)); (unsigned long long)PyGpuArray_SIZE(%(zz)s));
%(fail)s; %(fail)s;
} }
} }
......
...@@ -173,7 +173,8 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -173,7 +173,8 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
("npy_float64", "ga_double"), ("npy_float64", "ga_double"),
]: ]:
kop = kop.replace(npy, ga) kop = kop.replace(npy, ga)
return ElemwiseKernel(None, inps + outs, kop, preamble=support_code) return ElemwiseKernel(self.get_context(node), inps + outs, kop,
preamble=support_code)
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>'] return ['<numpy_compat.h>', '<gpuarray/types.h>']
...@@ -269,7 +270,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -269,7 +270,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
if (%(nd)s != PyGpuArray_NDIM(%(iname)s)) if (%(nd)s != PyGpuArray_NDIM(%(iname)s))
{ {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"need %(nd)s dims, not %%i", "need %(nd)s dims, not %%u",
PyGpuArray_NDIM(%(iname)s)); PyGpuArray_NDIM(%(iname)s));
%(fail)s; %(fail)s;
} }
...@@ -282,11 +283,11 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -282,11 +283,11 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
{ {
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"GpuElemwise. Input dimension mis-match. Input" "GpuElemwise. Input dimension mis-match. Input"
" %(idx)d (indices start at 0) has shape[%%i] == %%i" " %(idx)d (indices start at 0) has shape[%%d] == %%llu"
", but the output's size on that axis is %%i.", ", but the output's size on that axis is %%llu.",
i, i,
PyGpuArray_DIMS(%(iname)s)[i], (unsigned long long)PyGpuArray_DIMS(%(iname)s)[i],
dims[i] (unsigned long long)dims[i]
); );
%(fail)s; %(fail)s;
} }
...@@ -333,11 +334,11 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -333,11 +334,11 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"GpuElemwise. Output dimension mis-match. Output" "GpuElemwise. Output dimension mis-match. Output"
" %(idx)d (indices start at 0), working inplace" " %(idx)d (indices start at 0), working inplace"
" on input %(input_idx)s, has shape[%%i] == %%i" " on input %(input_idx)s, has shape[%%i] == %%llu"
", but the output's size on that axis is %%i.", ", but the output's size on that axis is %%llu.",
i, i,
PyGpuArray_DIMS(%(oname)s)[i], (unsigned long long)PyGpuArray_DIMS(%(oname)s)[i],
dims[i] (unsigned long long)dims[i]
); );
Py_DECREF(%(oname)s); Py_DECREF(%(oname)s);
%(oname)s = NULL; %(oname)s = NULL;
...@@ -446,10 +447,12 @@ class GpuDimShuffle(HideC, DimShuffle): ...@@ -446,10 +447,12 @@ class GpuDimShuffle(HideC, DimShuffle):
_f16_ok = True _f16_ok = True
def make_node(self, input): def make_node(self, input):
ctx_name = infer_context_name(input)
res = DimShuffle.make_node(self, input) res = DimShuffle.make_node(self, input)
otype = GpuArrayType(dtype=res.outputs[0].type.dtype, otype = GpuArrayType(dtype=res.outputs[0].type.dtype,
broadcastable=res.outputs[0].type.broadcastable) broadcastable=res.outputs[0].type.broadcastable,
input = as_gpuarray_variable(input) context_name=ctx_name)
input = as_gpuarray_variable(input, ctx_name)
return Apply(self, [input], [otype()]) return Apply(self, [input], [otype()])
def __str__(self): def __str__(self):
...@@ -650,7 +653,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -650,7 +653,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
def get_context(self, node): def get_context(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def perform(self, node, inp, out): def perform(self, node, inp, out, ctx):
raise MethodNotDefined("") raise MethodNotDefined("")
def supports_c_code(self, inputs): def supports_c_code(self, inputs):
...@@ -680,7 +683,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -680,7 +683,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
inp = ['fake_input_name_%d' % i for i in xrange(len(inputs))] inp = ['fake_input_name_%d' % i for i in xrange(len(inputs))]
out = ['fake_output_name_%d' % i for i in xrange(len(node.outputs))] out = ['fake_output_name_%d' % i for i in xrange(len(node.outputs))]
sub = {'fail': 'fake failure code'} sub = {'fail': 'fake failure code', 'context': 'fake context'}
try: try:
self.c_code(node, name, inp, out, sub) self.c_code(node, name, inp, out, sub)
...@@ -715,7 +718,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -715,7 +718,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
if (PyGpuArray_NDIM(%(x)s) != %(nd_in)s) if (PyGpuArray_NDIM(%(x)s) != %(nd_in)s)
{ {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"required nd=%(nd_in)s, got nd=%%i", PyGpuArray_NDIM(%(x)s)); "required nd=%(nd_in)s, got nd=%%u", PyGpuArray_NDIM(%(x)s));
%(fail)s; %(fail)s;
} }
""" % locals(), file=sio) """ % locals(), file=sio)
...@@ -1320,8 +1323,8 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1320,8 +1323,8 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
(void *)%(z)s->ga.data, (void *)%(z)s->ga.data,
(void *)&%(z)s->ga.offset}; (void *)&%(z)s->ga.offset};
if (verbose) printf("running kernel_reduce_ccontig_%(name)s" if (verbose) printf("running kernel_reduce_ccontig_%(name)s"
" n_threads=%%lu, size=%%lu, ndim=%%d\\n", " n_threads=%%llu, size=%%llu, ndim=%%u\\n",
n_threads,numEls, n_threads, numEls,
PyGpuArray_NDIM(%(x)s)); PyGpuArray_NDIM(%(x)s));
size_t n_shared = sizeof(%(acc_dtype)s) * n_threads; size_t n_shared = sizeof(%(acc_dtype)s) * n_threads;
int err = GpuKernel_call(&%(k_var)s, 1, &n_threads, &n_blocks, n_shared, kernel_params); int err = GpuKernel_call(&%(k_var)s, 1, &n_threads, &n_blocks, n_shared, kernel_params);
...@@ -1503,9 +1506,9 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1503,9 +1506,9 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
size_t n_blocks[3] = {1, std::min(PyGpuArray_DIMS(%(x)s)[1], (size_t) 4096), 1}; size_t n_blocks[3] = {1, std::min(PyGpuArray_DIMS(%(x)s)[1], (size_t) 4096), 1};
if (verbose) { if (verbose) {
fprintf(stderr, fprintf(stderr,
"running kernel_reduce_10_%(name)s n_blocks=(%%i,%%i)\\n", "running kernel_reduce_10_%(name)s n_blocks=(%%llu,%%llu)\\n",
n_blocks[0], (unsigned long long)n_blocks[0],
n_blocks[1]); (unsigned long long)n_blocks[1]);
} }
assert(PyGpuArray_DIMS(%(x)s)[1] == PyGpuArray_DIMS(%(z)s)[0]); assert(PyGpuArray_DIMS(%(x)s)[1] == PyGpuArray_DIMS(%(z)s)[0]);
size_t n_shared = sizeof(%(acc_dtype)s) * n_threads[0]; size_t n_shared = sizeof(%(acc_dtype)s) * n_threads[0];
......
...@@ -215,7 +215,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -215,7 +215,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
iadd_node = gop(xview, y).owner iadd_node = gop(xview, y).owner
self.iadd_node = iadd_node self.iadd_node = iadd_node
def perform(self, node, inputs, out_): def perform(self, node, inputs, out_, ctx):
out, = out_ out, = out_
x, y = inputs[:2] x, y = inputs[:2]
indices = list(reversed(inputs[2:])) indices = list(reversed(inputs[2:]))
...@@ -326,7 +326,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -326,7 +326,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
%(view_ndim)s, %(view_ndim)s,
dims, dims,
xview_strides, xview_strides,
%(x)s->ctx, %(x)s->context,
1, 1,
(PyObject *)%(x)s, (PyObject *)%(x)s,
(PyObject *)&PyGpuArrayType); (PyObject *)&PyGpuArrayType);
...@@ -360,10 +360,10 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -360,10 +360,10 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
""" """
return """GpuArray_setarray(&%(view)s->ga, &%(source)s->ga)""" % locals() return """GpuArray_setarray(&%(view)s->ga, &%(source)s->ga)""" % locals()
def c_support_code_apply(self, node, nodename): def c_support_code_struct(self, node, nodename):
gop = self.iadd_node.op gop = self.iadd_node.op
sub_name = nodename + "_add_to_zview" sub_name = nodename + "_add_to_zview"
ret = gop.c_support_code_apply(self.iadd_node, sub_name) ret = gop.c_support_code_struct(self.iadd_node, sub_name)
ret += """ ret += """
PyGpuArrayObject* inc_sub_iadd_%(nodename)s(PyGpuArrayObject* dst, PyGpuArrayObject* inc_sub_iadd_%(nodename)s(PyGpuArrayObject* dst,
PyGpuArrayObject* src){ PyGpuArrayObject* src){
...@@ -371,10 +371,11 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor): ...@@ -371,10 +371,11 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
""" % locals() """ % locals()
inputs = ["dst", "src"] inputs = ["dst", "src"]
outputs = ["ret"] outputs = ["ret"]
sub = {"fail": "return NULL;", "context": "dst->ctx"} sub = {"fail": "return NULL;", "context": "dst->context"}
ret += gop.c_code(self.iadd_node, sub_name, inputs, outputs, sub) ret += gop.c_code(self.iadd_node, sub_name, inputs, outputs, sub)
ret += """ ret += """
return ret; return ret;
} }
""" """
return ret return ret
...@@ -608,7 +609,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1): ...@@ -608,7 +609,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
return gof.Apply(self, [x_, y_, ilist_], [x_.type()]) return gof.Apply(self, [x_, y_, ilist_], [x_.type()])
def get_context(self, node): def get_context(self, node):
return self.node.outputs[0].type.context return node.outputs[0].type.context
def c_code_cache_version(self): def c_code_cache_version(self):
return (6,) return (6,)
...@@ -617,6 +618,9 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1): ...@@ -617,6 +618,9 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
return ['<numpy_compat.h>', '<gpuarray_helper.h>', return ['<numpy_compat.h>', '<gpuarray_helper.h>',
'<gpuarray/types.h>'] '<gpuarray/types.h>']
def c_header_dirs(self):
return [os.path.dirname(__file__)]
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
ctx = self.get_context(node) ctx = self.get_context(node)
if ctx.kind != 'cuda': if ctx.kind != 'cuda':
...@@ -754,7 +758,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) { ...@@ -754,7 +758,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
return [Kernel(code=code, name=kname, params=params, return [Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var)] flags=flags, objvar=k_var)]
def c_support_code_apply(self, node, nodename): def c_support_code_struct(self, node, nodename):
dtype_x = node.inputs[0].dtype dtype_x = node.inputs[0].dtype
dtype_y = node.inputs[1].dtype dtype_y = node.inputs[1].dtype
dtype_ind = node.inputs[2].dtype dtype_ind = node.inputs[2].dtype
...@@ -765,7 +769,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) { ...@@ -765,7 +769,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
itemsize_out = numpy.dtype(dtype_out).itemsize itemsize_out = numpy.dtype(dtype_out).itemsize
k_var = "k_vector_add_fast_" + nodename k_var = "k_vector_add_fast_" + nodename
return super(GpuAdvancedIncSubtensor1_dev20, self).c_support_code_apply(node, nodename) + """ return super(GpuAdvancedIncSubtensor1_dev20, self).c_support_code_struct(node, nodename) + """
int GpuArray_vector_add_fast(PyGpuArrayObject* py_self, int GpuArray_vector_add_fast(PyGpuArrayObject* py_self,
PyGpuArrayObject* py_other, PyGpuArrayObject* py_other,
PyGpuArrayObject *indices_arr) PyGpuArrayObject *indices_arr)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论