Add c_code support for the type and a c_code version of HostFromGpu.

c2a36918 · Arnaud Bergeron · f1d4a78b · c2a36918 · c2a36918
--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -43,6 +43,45 @@ class HostFromGpu(Op):
        z, = out
        z[0] = numpy.asarray(x)
+    def c_code(self, node, name, inputs, outputs, sub):
+        inp = inputs[0]
+        out = outputs[0]
+        fail = sub['fail']
+        return """{
+        GpuArray %(name)s_ga_s;
+        GpuArray *%(name)s_ga;
+        int %(name)serr;
+        PyArray_Descr *%(name)s_dtype;
+        if (!GpuArray_ISONESEGMENT(&%(inp)s->ga)) {
+            if (GpuArray_copy(&%(name)s_ga_s, &%(inp)s->ga, GA_C_ORDER) != GA_NO_ERROR) {
+                %(fail)s;
+            }
+            %(name)s_ga = &%(name)s_ga_s;
+        } else {
+            %(name)s_ga = &%(inp)s->ga;
+        }
+        %(name)s_dtype = typecode_to_dtype(%(inp)s->ga.typecode);
+        // PyArray_Empty below steals a reference to the dtype we pass it
+        // so we need an extra one to spare.
+        Py_INCREF(%(name)s_dtype);
+        %(out)s = (PyArrayObject *)PyArray_Empty(%(inp)s->ga.nd,
+                                (npy_intp *)%(inp)s->ga.dimensions,
+                                %(name)s_dtype,
+                                (%(inp)s->ga.flags & GA_F_CONTIGUOUS) &&
+                                !(%(inp)s->ga.flags & GA_C_CONTIGUOUS));
+        if (%(out)s == NULL) {
+            if (%(name)s_ga == &%(name)s_ga_s) GpuArray_clear(%(name)s_ga);
+            %(fail)s
+        }
+        %(name)serr = GpuArray_read(PyArray_DATA(%(out)s),
+                                    PyArray_NBYTES(%(out)s),
+                                    %(name)s_ga);
+        if (%(name)s_ga == &%(name)s_ga_s) GpuArray_clear(%(name)s_ga);
+        if (%(name)serr != GA_NO_ERROR) {
+            %(fail)s
+        }
+        }""" % {'name': name, 'fail': sub['fail'], 'inp': inp, 'out': out}
    def grad(self, inputs, grads):
        gz, = grads
        return [gpu_from_host(gz)]

--- a/theano/sandbox/gpuarray/type.py
+++ b/theano/sandbox/gpuarray/type.py
@@ -114,8 +114,59 @@ class GpuArrayType(Type):
                hash(self.kind) ^ hash(self.context))
    def __str__(self):
-        return "GpuArray<%s>" % self.dtype
+        return "GpuArray[%s, %s]<%s>" % (self.kind, self.context, self.dtype)
+    def c_declare(self, name, sub):
+        return "GpuArrayObject *%s;" % (name,)
+    def c_init(self, name, sub):
+        return "%s = NULL;" % (name,)
+    def c_extract(self, name, sub):
+        # TODO I don't check broadcast stuff for now.
+        return """
+        %(name)s = NULL;
+        if (py_%(name)s == Py_None) {
+            PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
+            %(fail)s
+        }
+        if (py_%(name)s->ob_type != &GpuArrayType &&
+            !PyObject_TypeCheck(py_%(name)s, &GpuArrayType)) {
+            PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
+            %(fail)s
+        }
+        %(name)s = (GpuArrayObject *)py_%(name)s;
+        Py_INCREF(%(name)s);
+        """ % {'name': name, 'fail': sub['fail']}
+    def c_cleanup(self, name, sub):
+        return ""
+    def c_sync(self, name, sub):
+        return """
+        if (!%(name)s) {
+            Py_XDECREF(py_%(name)s);
+            Py_INCREF(Py_None);
+            py_%(name)s = Py_None;
+        } else if ((void *)py_%(name)s != (void *)%(name)s) {
+            Py_XDECREF(py_%(name)s);
+            py_%(name)s = (PyObject *)%(name)s;
+            Py_INCREF(py_%(name)s);
+        }
+        """ % {'name': name}
+    def c_headers(self):
+        return ['pygpu/gpuarray.h', 'compyte/array.h', 'compyte/kernel.h',
+                'compyte/error.h']
+    def c_libraries(self):
+        return ['compyte']
+    def c_header_dirs(self):
+        return [pygpu.get_include()]
+    def c_code_cache_version(self):
+        return () # TODO: This is temporary
 class _operators(tensor.basic._tensor_py_operators):