Merge pull request #2907 from abergeron/new_call_proto

Follow the change in call protocol in libgpuarray.

Merge pull request #2907 from abergeron/new_call_proto
7359170d · Frédéric Bastien · 6a8fa46f · defa1d3a · 7359170d · 7359170d
--- a/theano/sandbox/gpuarray/__init__.py
+++ b/theano/sandbox/gpuarray/__init__.py
@@ -33,6 +33,11 @@ from . import opt
 def init_dev(dev):
+    if pygpu.gpuarray.api_version() != (-10000, 0):
+        raise RuntimeError("Wrong API version for gpuarray:",
+                           pygpu.gpuarray.api_version(),
+                           "Make sure Theano and libgpuarray/pygpu "
+                           "are in sync.")
    global pygpu_activated
    context = pygpu.init(dev)
    pygpu.set_default_context(context)

--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -944,6 +944,7 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
        kname = self.gpu_kernels(node, name)[0].objvar
        s = """
        size_t dims[2] = {0, 0};
+        size_t ls, gs;
        void *args[3];
        int err;
@@ -959,10 +960,12 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
            %(fail)s
        }
-        args[0] = &%(z)s->ga;
+        args[0] = %(z)s->ga.data;
        args[1] = &dims[0];
        args[2] = &dims[1];
-        err = GpuKernel_call(&%(kname)s, 0, 1, 256, args);
+        ls = 1;
+        gs = 256;
+        err = GpuKernel_call(&%(kname)s, 1, &ls, &gs, 0, args);
        if (err != GA_NO_ERROR) {
            PyErr_Format(PyExc_RuntimeError,
                         "gpuarray error: kEye: %%s. n%%lu, m=%%lu.",
@@ -978,4 +981,4 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
        return s
    def c_code_cache_version(self):
-        return (3, self.GpuKernelBase_version)
+        return (4, self.GpuKernelBase_version)
--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -2664,6 +2664,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        nd_out = node.outputs[0].ndim
        code = """
        size_t gs = 1;
+        size_t ls;
        unsigned int n = 1;
        unsigned int proxy_dim[%(nd_in)s];
        unsigned int proxy_off;
@@ -2727,7 +2728,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        # data in the proper type.
        code += """
        args[0] = &n;
-        args[1] = &tmp->ga;
+        args[1] = tmp->ga.data;
 """ % dict(output=output)
        p = 2
@@ -2742,7 +2743,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
                code += "gs *= %(input)s->ga.dimensions[%(i)s];" % dict(input=input, i=i)
        code += """
-        args[%(p)s] = &%(input)s->ga;
+        args[%(p)s] = %(input)s->ga.data;
        proxy_off = %(input)s->ga.offset;
        args[%(p)s+1] = &proxy_off;
 """ % dict(p=p, input=input)
@@ -2758,7 +2759,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        code += """
        if (gs == 0) gs = 1;
        n /= gs;
-        err = GpuKernel_call(&%(k_var)s, 0, %(ls)s, gs, args);
+        ls = %(ls)s;
+        err = GpuKernel_call(&%(k_var)s, 1, &ls, &gs, 0, args);
        if (err != GA_NO_ERROR) {
            PyErr_Format(PyExc_RuntimeError,
                         "gpuarray error: GpuCAReduceCPY: %%s.",
@@ -2788,7 +2790,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        return code
    def c_code_cache_version(self):
-        return (0, self.GpuKernelBase_version)
+        return (1, self.GpuKernelBase_version)
    def generate_kernel(self, node, odtype, redux):
        if isinstance(self.scalar_op, scalar.basic.Add):

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -994,11 +994,18 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
        {
          void *args[4];
-          args[0] = &%(o_sample)s->ga;
+          size_t ls = 0, gs = 0;
-          args[1] = &%(o_rstate)s->ga;
+          args[0] = %(o_sample)s->ga.data;
+          args[1] = %(o_rstate)s->ga.data;
          args[2] = &n_elements;
          args[3] = &n_streams;
-          int err = GpuKernel_call(&%(kname)s, n_elements, 0, 0, args);
+          int err = GpuKernel_sched(&%(kname)s, n_elements, &ls, &gs);
+          if (err != GA_NO_ERROR) {
+              PyErr_Format(PyExc_RuntimeError, "GpuKernel_sched: %%s\\n",
+                           GpuKernel_error(&%(kname)s, err));
+              %(fail)s
+          }
+          err = GpuKernel_call(&%(kname)s, 1, &ls, &gs, 0, args);
          if (err != GA_NO_ERROR) {
              PyErr_Format(PyExc_RuntimeError, "GpuKernel_call: %%s\\n",
                           GpuKernel_error(&%(kname)s, err));
@@ -1008,7 +1015,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
        """ % locals()
    def c_code_cache_version(self):
-        return (6, self.GpuKernelBase_version)
+        return (7, self.GpuKernelBase_version)
 def guess_n_streams(size, warn=False):