Update to the new API and use the default context.

01b993dd · Arnaud Bergeron · dc5d54fb · 01b993dd · 01b993dd · dc5d54fb
--- a/theano/sandbox/gpuarray/__init__.py
+++ b/theano/sandbox/gpuarray/__init__.py
@@ -32,21 +32,8 @@ from type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,


 def init_dev(dev):
-    import globals
-    if dev.startswith('cuda'):
-        # format is cuda<devnum>
-        globals.kind = 'cuda'
-        devnum = int(dev[4:])
-    elif dev.startswith('opencl'):
-        # format is opencl<platnum>:<devnum>
-        globals.kind = 'opencl'
-        devspec = dev[6:]
-        plat, dev = devspec.split(':')
-        devnum = int(dev)|(int(plat)<<16)
-    else:
-        globals.kind = None
-    if globals.kind:
-        globals.context = pygpu.gpuarray.init(globals.kind, devnum)
+    context = pygpu.init(dev)
+    pygpu.set_default_context(context)

 if pygpu:
    try:

--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -20,8 +20,8 @@ def as_gpuarray_variable(x):
    return gpu_from_host(tensor_x)


-def as_gpuarray(x, kind, context):
-    return gpuarray.array(x, kind=kind, context=context, copy=False)
+def as_gpuarray(x):
+    return gpuarray.array(x, copy=False)


 class HostFromGpu(Op):
@@ -76,7 +76,7 @@ class HostFromGpu(Op):
                                    %(name)s_ga);
        if (%(name)s_ga == &%(name)s_ga_s) GpuArray_clear(%(name)s_ga);
        if (%(name)serr != GA_NO_ERROR) {
-            PyErr_SetSring(PyExc_RuntimeError, "Could not read device data.");
+            PyErr_SetString(PyExc_RuntimeError, "Could not read device data.");
            %(fail)s
        }
        """ % {'name': name, 'fail': sub['fail'], 'inp': inputs[0],
@@ -120,7 +120,7 @@ class GpuFromHost(Op):
        x, = inp
        z, = out
        type = node.outputs[0].type
-        z[0] = gpuarray.array(x, kind=type.kind, context=type.context)
+        z[0] = gpuarray.array(x)

    def grad(self, inputs, grads):
        gz, = grads
@@ -141,17 +141,23 @@ class GpuFromHost(Op):
        return """
        PyArrayObject *%(name)s_tmp;
        int %(name)serr;
+        if ((PyObject *)GpuArray_default_context == Py_None) {
+            PyErr_SetString(PyExc_ValueError, "No default context, gpuarray not initialized?");
+            %(fail)s
+        }
        %(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s);
        if (%(name)s_tmp == NULL) {
            %(fail)s
        }
-        %(out)s = new_GpuArray((PyObject *)&GpuArrayType);
+        %(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context);
        if (%(out)s == NULL) {
            Py_DECREF(%(name)s_tmp);
            %(fail)s
        }
-        %(name)serr = GpuArray_empty(&%(out)s->ga, compyte_get_ops("%(kind)s"),
-                                     (void *)%(ctx)s, %(typecode)s,
+        %(name)serr = GpuArray_empty(&%(out)s->ga,
+                                     GpuArray_default_context->ops,
+                                     GpuArray_default_context->ctx,
+                                     %(typecode)s,
                                     PyArray_NDIM(%(inp)s),
                                     (size_t *)PyArray_DIMS(%(inp)s),
                                     GA_C_ORDER);
@@ -170,12 +176,12 @@ class GpuFromHost(Op):
            PyErr_SetString(PyExc_RuntimeError, "Could not copy array data to device");
            %(fail)s
        }
-        """ % {'name': name, 'kind': type.kind, 'ctx': hex(type.context),
-               'inp': inputs[0], 'out': outputs[0], 'fail': sub['fail'],
+        """ % {'name': name, 'inp': inputs[0],
+               'out': outputs[0], 'fail': sub['fail'],
               'typecode': type.typecode}
-    # Don't implement c_code_cache_version since we harcode the ctx address
-    # in the code block and this will not work across processes

+    def c_code_cache_version(self):
+        return (0,)

 gpu_from_host = GpuFromHost()

@@ -197,8 +203,7 @@ class GpuFromCuda(Op):
    def perform(self, node, inp, out):
        x, = inp
        z, = out
-        z[0] = gpuarray.array(numpy.asarray(x), kind=globals.kind,
-                              context=globals.context)
+        z[0] = gpuarray.array(numpy.asarray(x))

    def grad(self, inputs, grads):
        gz, = grads
@@ -247,9 +252,6 @@ class GpuFromCuda(Op):
        """

    def c_code(self, node, name, input, output, sub):
-        type = node.outputs[0].type
-        if type.kind != "cuda":
-            raise RuntimeError("GpuFromCuda for non-cuda dest")
        return """
        int %(name)serr;
        gpudata *%(name)sdata;
@@ -258,8 +260,8 @@ class GpuFromCuda(Op):
        ssize_t *%(name)sstr;

        cuCtxGetCurrent(&%(name)scur);
-        if (%(name)scur != cuda_get_ctx((void *)%(ctx)s)) {
-            PyErr_SetString(PyErr_ValueError, "Ambient context is not the same as output context.");
+        if (%(name)scur != cuda_get_ctx(GpuArray_default_context->ctx)) {
+            PyErr_SetString(PyErr_ValueError, "Ambient cuda context is not the same as output context.");
            %(fail)s
        }
        %(name)sdims = (size_t *)calloc(%(in)s->nd, sizeof(size_t));
@@ -286,7 +288,8 @@ class GpuFromCuda(Op):
            %(fail)s
        }

-        %(name)sdata = cuda_make_buf((void *)%(ctx)s, (CUdeviceptr)%(in)s->devdata,
+        %(name)sdata = cuda_make_buf(GpuArray_default_context->ctx,
+                                     (CUdeviceptr)%(in)s->devdata,
                                     (size_t)%(in)s->data_allocated);
        if (%(name)sdata == NULL) {
            Py_DECREF(%(out)s);
@@ -295,7 +298,8 @@ class GpuFromCuda(Op):
            PyErr_SetString(PyExc_MemoryError, "Could not allocate gpudata structure.");
            %(fail)s
        }
-        %(name)serr = GpuArray_fromdata(&%(out)s->ga, compyte_get_ops("cuda"),
+        %(name)serr = GpuArray_fromdata(&%(out)s->ga,
+                                        GpuArray_default_context->ops,
                                        %(name)sdata, 0, GA_FLOAT, %(in)s->nd,
                                        %(name)sdims, %(name)sstr, 1);
        free(%(name)sdims);
@@ -307,11 +311,10 @@ class GpuFromCuda(Op):
        }
        Py_INCREF(%(in)s);
        %(out)s->base = %(in)s;
-        """ % {'name':name, 'ctx': hex(type.context), 'in': inputs[0],
-               'out': outputs[0], 'fail': sub['fail']}
-
-    # Don't implement c_code_cache_version since we harcode the ctx address
-    # in the code block and this will not work across processes
+        """ % {'name':name, 'in': inputs[0], 'out': outputs[0],
+               'fail': sub['fail']}

+    def c_code_cache_version(self):
+        return (0,)

 gpu_from_cuda = GpuFromCuda()
--- a/theano/sandbox/gpuarray/globals.py
+++ b/theano/sandbox/gpuarray/globals.py
-# This modules serves to stuff global values (like kind and context)
-kind = None
-context = None
--- a/theano/sandbox/gpuarray/tests/test_basic_ops.py
+++ b/theano/sandbox/gpuarray/tests/test_basic_ops.py
@@ -20,7 +20,7 @@ def test_transfer():
    g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
    
    av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
-    gv = gpuarray.array(av, kind=g.type.kind, context=g.type.context)
+    gv = gpuarray.array(av)
    
    f = theano.function([a], gpu_from_host(a))
    fv = f(av)

--- a/theano/sandbox/gpuarray/type.py
+++ b/theano/sandbox/gpuarray/type.py
@@ -15,21 +15,11 @@ except ImportError:
    pass

 class GpuArrayType(Type):
-    def __init__(self, dtype, broadcastable, kind=None, context=None,
-                 name=None):
-        import globals
-        if kind is None:
-            kind = globals.kind
-        if context is None:
-            context = globals.context
+    def __init__(self, dtype, broadcastable, name=None):
        # In case this was not provided and no global value is available
-        if kind is None:
-            raise RuntimeError("pygpu is not initialized")
        self.dtype = str(dtype)
        self.broadcastable = tuple(bool(b) for b in broadcastable)
        self.ndim = len(self.broadcastable)
-        self.kind = kind
-        self.context = context
        self.name = name
        try:
            self.typecode = gpuarray.dtype_to_typecode(self.dtype)
@@ -42,10 +32,6 @@ class GpuArrayType(Type):
            if not isinstance(data, gpuarray.GpuArray):
                raise TypeError("%s expected a GpuArray object." % self,
                                data, type(data))
-            if self.kind != data.kind:
-                raise TypeError("kind of GpuArray does not match")
-            if self.context != data.context:
-                raise TypeError("context of GpuArray differs")
            if self.typecode != data.typecode:
                raise TypeError("%s expected typecode %d (dtype %s), "
                                "got %d (dtype %s)." %
@@ -54,13 +40,11 @@ class GpuArrayType(Type):
            # fallthrough to ndim check
        elif allow_downcast:
            data = gpuarray.array(data, dtype=self.typecode, copy=False,
-                                  kind=self.kind, context=self.context,
                                  ndmin=len(self.broadcastable))
        else:
            up_dtype = scalar.upcast(self.dtype, data.dtype)
            if up_dtype == self.dtype:
-                data = gpuarray.array(data, dtype=self.typecode, copy=False,
-                                      kind=self.kind, context=self.context)
+                data = gpuarray.array(data, dtype=self.typecode, copy=False)
            else:
                raise TypeError("%s cannot store a value of dtype %s "
                                "without risking loss of precision." %
@@ -98,8 +82,7 @@ class GpuArrayType(Type):
            return numpy.asarray(res).all()

    def value_zeros(self, shape):
-        return pygpu.gpuarray.zeros(shape, dtype=self.typecode, kind=self.kind,
-                                    context=self.context)
+        return pygpu.gpuarray.zeros(shape, dtype=self.typecode)

    def make_variable(self, name=None):
        return self.Variable(self, name=name)
@@ -107,16 +90,13 @@ class GpuArrayType(Type):
    def __eq__(self, other):
        return (type(self) == type(other) and
                self.typecode == other.typecode and
-                self.broadcastable == other.broadcastable and
-                self.kind == other.kind and
-                self.context == other.context)
+                self.broadcastable == other.broadcastable)

    def __hash__(self):
-        return (hash(self.typecode) ^ hash(self.broadcastable) ^
-                hash(self.kind) ^ hash(self.context))
+        return (hash(self.typecode) ^ hash(self.broadcastable))

    def __str__(self):
-        return "GpuArray[%s, %s]<%s>" % (self.kind, self.context, self.dtype)
+        return "GpuArray[%s, %s]<%s>" % (self.dtype,)

    def c_declare(self, name, sub):
        return "GpuArrayObject *%s;" % (name,)
@@ -144,7 +124,7 @@ class GpuArrayType(Type):
        """ % {'name': name, 'fail': sub['fail']}

    def c_cleanup(self, name, sub):
-        return ""
+        return "Py_XDECREF(%(name)s); %(name)s = NULL;" % {'name': name }

    def c_sync(self, name, sub):
        return """
@@ -167,7 +147,7 @@ class GpuArrayType(Type):
        return [pygpu.get_include()]

    def c_code_cache_version(self):
-        return () # TODO: This is temporary
+        return (0,)


 class _operators(tensor.basic._tensor_py_operators):
@@ -229,15 +209,14 @@ GpuArrayType.SharedVariable = GpuArraySharedVariable

 def gpuarray_shared_constructor(value, name=None, strict=False,
                                allow_downcast=None, borrow=False,
-                                broadcastable=None, kind=None, context=None):
+                                broadcastable=None):
    """SharedVariable constructor for GpuArrayType"""
    if not isinstance(value, (numpy.ndarray, pygpu.gpuarray.GpuArray)):
        raise TypeError('ndarray or GpuArray required')

    if broadcastable is None:
        broadcastable = (False,) * value.ndim
-    type = GpuArrayType(value.dtype, broadcastable, kind=kind, context=context)
-    deviceval = pygpu.gpuarray.array(value, copy=(not borrow), kind=type.kind,
-                                     context=type.context)
+    type = GpuArrayType(value.dtype, broadcastable)
+    deviceval = pygpu.gpuarray.array(value, copy=(not borrow))
    return GpuArraySharedVariable(type=type, value=deviceval, name=name,
                                  strict=strict)