提交 c2a36918 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add c_code support for the type and a c_code version of HostFromGpu.

上级 f1d4a78b
...@@ -43,6 +43,45 @@ class HostFromGpu(Op): ...@@ -43,6 +43,45 @@ class HostFromGpu(Op):
z, = out z, = out
z[0] = numpy.asarray(x) z[0] = numpy.asarray(x)
def c_code(self, node, name, inputs, outputs, sub):
inp = inputs[0]
out = outputs[0]
fail = sub['fail']
return """{
GpuArray %(name)s_ga_s;
GpuArray *%(name)s_ga;
int %(name)serr;
PyArray_Descr *%(name)s_dtype;
if (!GpuArray_ISONESEGMENT(&%(inp)s->ga)) {
if (GpuArray_copy(&%(name)s_ga_s, &%(inp)s->ga, GA_C_ORDER) != GA_NO_ERROR) {
%(fail)s;
}
%(name)s_ga = &%(name)s_ga_s;
} else {
%(name)s_ga = &%(inp)s->ga;
}
%(name)s_dtype = typecode_to_dtype(%(inp)s->ga.typecode);
// PyArray_Empty below steals a reference to the dtype we pass it
// so we need an extra one to spare.
Py_INCREF(%(name)s_dtype);
%(out)s = (PyArrayObject *)PyArray_Empty(%(inp)s->ga.nd,
(npy_intp *)%(inp)s->ga.dimensions,
%(name)s_dtype,
(%(inp)s->ga.flags & GA_F_CONTIGUOUS) &&
!(%(inp)s->ga.flags & GA_C_CONTIGUOUS));
if (%(out)s == NULL) {
if (%(name)s_ga == &%(name)s_ga_s) GpuArray_clear(%(name)s_ga);
%(fail)s
}
%(name)serr = GpuArray_read(PyArray_DATA(%(out)s),
PyArray_NBYTES(%(out)s),
%(name)s_ga);
if (%(name)s_ga == &%(name)s_ga_s) GpuArray_clear(%(name)s_ga);
if (%(name)serr != GA_NO_ERROR) {
%(fail)s
}
}""" % {'name': name, 'fail': sub['fail'], 'inp': inp, 'out': out}
def grad(self, inputs, grads): def grad(self, inputs, grads):
gz, = grads gz, = grads
return [gpu_from_host(gz)] return [gpu_from_host(gz)]
......
...@@ -114,8 +114,59 @@ class GpuArrayType(Type): ...@@ -114,8 +114,59 @@ class GpuArrayType(Type):
hash(self.kind) ^ hash(self.context)) hash(self.kind) ^ hash(self.context))
def __str__(self): def __str__(self):
return "GpuArray<%s>" % self.dtype return "GpuArray[%s, %s]<%s>" % (self.kind, self.context, self.dtype)
def c_declare(self, name, sub):
return "GpuArrayObject *%s;" % (name,)
def c_init(self, name, sub):
return "%s = NULL;" % (name,)
def c_extract(self, name, sub):
# TODO I don't check broadcast stuff for now.
return """
%(name)s = NULL;
if (py_%(name)s == Py_None) {
PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
%(fail)s
}
if (py_%(name)s->ob_type != &GpuArrayType &&
!PyObject_TypeCheck(py_%(name)s, &GpuArrayType)) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
%(fail)s
}
%(name)s = (GpuArrayObject *)py_%(name)s;
Py_INCREF(%(name)s);
""" % {'name': name, 'fail': sub['fail']}
def c_cleanup(self, name, sub):
return ""
def c_sync(self, name, sub):
return """
if (!%(name)s) {
Py_XDECREF(py_%(name)s);
Py_INCREF(Py_None);
py_%(name)s = Py_None;
} else if ((void *)py_%(name)s != (void *)%(name)s) {
Py_XDECREF(py_%(name)s);
py_%(name)s = (PyObject *)%(name)s;
Py_INCREF(py_%(name)s);
}
""" % {'name': name}
def c_headers(self):
return ['pygpu/gpuarray.h', 'compyte/array.h', 'compyte/kernel.h',
'compyte/error.h']
def c_libraries(self):
return ['compyte']
def c_header_dirs(self):
return [pygpu.get_include()]
def c_code_cache_version(self):
return () # TODO: This is temporary
class _operators(tensor.basic._tensor_py_operators): class _operators(tensor.basic._tensor_py_operators):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论