提交 01b993dd authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Update to the new API and use the default context.

上级 dc5d54fb
...@@ -32,21 +32,8 @@ from type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant, ...@@ -32,21 +32,8 @@ from type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
def init_dev(dev): def init_dev(dev):
import globals context = pygpu.init(dev)
if dev.startswith('cuda'): pygpu.set_default_context(context)
# format is cuda<devnum>
globals.kind = 'cuda'
devnum = int(dev[4:])
elif dev.startswith('opencl'):
# format is opencl<platnum>:<devnum>
globals.kind = 'opencl'
devspec = dev[6:]
plat, dev = devspec.split(':')
devnum = int(dev)|(int(plat)<<16)
else:
globals.kind = None
if globals.kind:
globals.context = pygpu.gpuarray.init(globals.kind, devnum)
if pygpu: if pygpu:
try: try:
......
...@@ -20,8 +20,8 @@ def as_gpuarray_variable(x): ...@@ -20,8 +20,8 @@ def as_gpuarray_variable(x):
return gpu_from_host(tensor_x) return gpu_from_host(tensor_x)
def as_gpuarray(x, kind, context): def as_gpuarray(x):
return gpuarray.array(x, kind=kind, context=context, copy=False) return gpuarray.array(x, copy=False)
class HostFromGpu(Op): class HostFromGpu(Op):
...@@ -76,7 +76,7 @@ class HostFromGpu(Op): ...@@ -76,7 +76,7 @@ class HostFromGpu(Op):
%(name)s_ga); %(name)s_ga);
if (%(name)s_ga == &%(name)s_ga_s) GpuArray_clear(%(name)s_ga); if (%(name)s_ga == &%(name)s_ga_s) GpuArray_clear(%(name)s_ga);
if (%(name)serr != GA_NO_ERROR) { if (%(name)serr != GA_NO_ERROR) {
PyErr_SetSring(PyExc_RuntimeError, "Could not read device data."); PyErr_SetString(PyExc_RuntimeError, "Could not read device data.");
%(fail)s %(fail)s
} }
""" % {'name': name, 'fail': sub['fail'], 'inp': inputs[0], """ % {'name': name, 'fail': sub['fail'], 'inp': inputs[0],
...@@ -120,7 +120,7 @@ class GpuFromHost(Op): ...@@ -120,7 +120,7 @@ class GpuFromHost(Op):
x, = inp x, = inp
z, = out z, = out
type = node.outputs[0].type type = node.outputs[0].type
z[0] = gpuarray.array(x, kind=type.kind, context=type.context) z[0] = gpuarray.array(x)
def grad(self, inputs, grads): def grad(self, inputs, grads):
gz, = grads gz, = grads
...@@ -141,17 +141,23 @@ class GpuFromHost(Op): ...@@ -141,17 +141,23 @@ class GpuFromHost(Op):
return """ return """
PyArrayObject *%(name)s_tmp; PyArrayObject *%(name)s_tmp;
int %(name)serr; int %(name)serr;
if ((PyObject *)GpuArray_default_context == Py_None) {
PyErr_SetString(PyExc_ValueError, "No default context, gpuarray not initialized?");
%(fail)s
}
%(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s); %(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s);
if (%(name)s_tmp == NULL) { if (%(name)s_tmp == NULL) {
%(fail)s %(fail)s
} }
%(out)s = new_GpuArray((PyObject *)&GpuArrayType); %(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context);
if (%(out)s == NULL) { if (%(out)s == NULL) {
Py_DECREF(%(name)s_tmp); Py_DECREF(%(name)s_tmp);
%(fail)s %(fail)s
} }
%(name)serr = GpuArray_empty(&%(out)s->ga, compyte_get_ops("%(kind)s"), %(name)serr = GpuArray_empty(&%(out)s->ga,
(void *)%(ctx)s, %(typecode)s, GpuArray_default_context->ops,
GpuArray_default_context->ctx,
%(typecode)s,
PyArray_NDIM(%(inp)s), PyArray_NDIM(%(inp)s),
(size_t *)PyArray_DIMS(%(inp)s), (size_t *)PyArray_DIMS(%(inp)s),
GA_C_ORDER); GA_C_ORDER);
...@@ -170,12 +176,12 @@ class GpuFromHost(Op): ...@@ -170,12 +176,12 @@ class GpuFromHost(Op):
PyErr_SetString(PyExc_RuntimeError, "Could not copy array data to device"); PyErr_SetString(PyExc_RuntimeError, "Could not copy array data to device");
%(fail)s %(fail)s
} }
""" % {'name': name, 'kind': type.kind, 'ctx': hex(type.context), """ % {'name': name, 'inp': inputs[0],
'inp': inputs[0], 'out': outputs[0], 'fail': sub['fail'], 'out': outputs[0], 'fail': sub['fail'],
'typecode': type.typecode} 'typecode': type.typecode}
# Don't implement c_code_cache_version since we harcode the ctx address
# in the code block and this will not work across processes
def c_code_cache_version(self):
return (0,)
gpu_from_host = GpuFromHost() gpu_from_host = GpuFromHost()
...@@ -197,8 +203,7 @@ class GpuFromCuda(Op): ...@@ -197,8 +203,7 @@ class GpuFromCuda(Op):
def perform(self, node, inp, out): def perform(self, node, inp, out):
x, = inp x, = inp
z, = out z, = out
z[0] = gpuarray.array(numpy.asarray(x), kind=globals.kind, z[0] = gpuarray.array(numpy.asarray(x))
context=globals.context)
def grad(self, inputs, grads): def grad(self, inputs, grads):
gz, = grads gz, = grads
...@@ -247,9 +252,6 @@ class GpuFromCuda(Op): ...@@ -247,9 +252,6 @@ class GpuFromCuda(Op):
""" """
def c_code(self, node, name, input, output, sub): def c_code(self, node, name, input, output, sub):
type = node.outputs[0].type
if type.kind != "cuda":
raise RuntimeError("GpuFromCuda for non-cuda dest")
return """ return """
int %(name)serr; int %(name)serr;
gpudata *%(name)sdata; gpudata *%(name)sdata;
...@@ -258,8 +260,8 @@ class GpuFromCuda(Op): ...@@ -258,8 +260,8 @@ class GpuFromCuda(Op):
ssize_t *%(name)sstr; ssize_t *%(name)sstr;
cuCtxGetCurrent(&%(name)scur); cuCtxGetCurrent(&%(name)scur);
if (%(name)scur != cuda_get_ctx((void *)%(ctx)s)) { if (%(name)scur != cuda_get_ctx(GpuArray_default_context->ctx)) {
PyErr_SetString(PyErr_ValueError, "Ambient context is not the same as output context."); PyErr_SetString(PyErr_ValueError, "Ambient cuda context is not the same as output context.");
%(fail)s %(fail)s
} }
%(name)sdims = (size_t *)calloc(%(in)s->nd, sizeof(size_t)); %(name)sdims = (size_t *)calloc(%(in)s->nd, sizeof(size_t));
...@@ -286,7 +288,8 @@ class GpuFromCuda(Op): ...@@ -286,7 +288,8 @@ class GpuFromCuda(Op):
%(fail)s %(fail)s
} }
%(name)sdata = cuda_make_buf((void *)%(ctx)s, (CUdeviceptr)%(in)s->devdata, %(name)sdata = cuda_make_buf(GpuArray_default_context->ctx,
(CUdeviceptr)%(in)s->devdata,
(size_t)%(in)s->data_allocated); (size_t)%(in)s->data_allocated);
if (%(name)sdata == NULL) { if (%(name)sdata == NULL) {
Py_DECREF(%(out)s); Py_DECREF(%(out)s);
...@@ -295,7 +298,8 @@ class GpuFromCuda(Op): ...@@ -295,7 +298,8 @@ class GpuFromCuda(Op):
PyErr_SetString(PyExc_MemoryError, "Could not allocate gpudata structure."); PyErr_SetString(PyExc_MemoryError, "Could not allocate gpudata structure.");
%(fail)s %(fail)s
} }
%(name)serr = GpuArray_fromdata(&%(out)s->ga, compyte_get_ops("cuda"), %(name)serr = GpuArray_fromdata(&%(out)s->ga,
GpuArray_default_context->ops,
%(name)sdata, 0, GA_FLOAT, %(in)s->nd, %(name)sdata, 0, GA_FLOAT, %(in)s->nd,
%(name)sdims, %(name)sstr, 1); %(name)sdims, %(name)sstr, 1);
free(%(name)sdims); free(%(name)sdims);
...@@ -307,11 +311,10 @@ class GpuFromCuda(Op): ...@@ -307,11 +311,10 @@ class GpuFromCuda(Op):
} }
Py_INCREF(%(in)s); Py_INCREF(%(in)s);
%(out)s->base = %(in)s; %(out)s->base = %(in)s;
""" % {'name':name, 'ctx': hex(type.context), 'in': inputs[0], """ % {'name':name, 'in': inputs[0], 'out': outputs[0],
'out': outputs[0], 'fail': sub['fail']} 'fail': sub['fail']}
# Don't implement c_code_cache_version since we harcode the ctx address
# in the code block and this will not work across processes
def c_code_cache_version(self):
return (0,)
gpu_from_cuda = GpuFromCuda() gpu_from_cuda = GpuFromCuda()
# This modules serves to stuff global values (like kind and context)
kind = None
context = None
...@@ -20,7 +20,7 @@ def test_transfer(): ...@@ -20,7 +20,7 @@ def test_transfer():
g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g') g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32') av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
gv = gpuarray.array(av, kind=g.type.kind, context=g.type.context) gv = gpuarray.array(av)
f = theano.function([a], gpu_from_host(a)) f = theano.function([a], gpu_from_host(a))
fv = f(av) fv = f(av)
......
...@@ -15,21 +15,11 @@ except ImportError: ...@@ -15,21 +15,11 @@ except ImportError:
pass pass
class GpuArrayType(Type): class GpuArrayType(Type):
def __init__(self, dtype, broadcastable, kind=None, context=None, def __init__(self, dtype, broadcastable, name=None):
name=None):
import globals
if kind is None:
kind = globals.kind
if context is None:
context = globals.context
# In case this was not provided and no global value is available # In case this was not provided and no global value is available
if kind is None:
raise RuntimeError("pygpu is not initialized")
self.dtype = str(dtype) self.dtype = str(dtype)
self.broadcastable = tuple(bool(b) for b in broadcastable) self.broadcastable = tuple(bool(b) for b in broadcastable)
self.ndim = len(self.broadcastable) self.ndim = len(self.broadcastable)
self.kind = kind
self.context = context
self.name = name self.name = name
try: try:
self.typecode = gpuarray.dtype_to_typecode(self.dtype) self.typecode = gpuarray.dtype_to_typecode(self.dtype)
...@@ -42,10 +32,6 @@ class GpuArrayType(Type): ...@@ -42,10 +32,6 @@ class GpuArrayType(Type):
if not isinstance(data, gpuarray.GpuArray): if not isinstance(data, gpuarray.GpuArray):
raise TypeError("%s expected a GpuArray object." % self, raise TypeError("%s expected a GpuArray object." % self,
data, type(data)) data, type(data))
if self.kind != data.kind:
raise TypeError("kind of GpuArray does not match")
if self.context != data.context:
raise TypeError("context of GpuArray differs")
if self.typecode != data.typecode: if self.typecode != data.typecode:
raise TypeError("%s expected typecode %d (dtype %s), " raise TypeError("%s expected typecode %d (dtype %s), "
"got %d (dtype %s)." % "got %d (dtype %s)." %
...@@ -54,13 +40,11 @@ class GpuArrayType(Type): ...@@ -54,13 +40,11 @@ class GpuArrayType(Type):
# fallthrough to ndim check # fallthrough to ndim check
elif allow_downcast: elif allow_downcast:
data = gpuarray.array(data, dtype=self.typecode, copy=False, data = gpuarray.array(data, dtype=self.typecode, copy=False,
kind=self.kind, context=self.context,
ndmin=len(self.broadcastable)) ndmin=len(self.broadcastable))
else: else:
up_dtype = scalar.upcast(self.dtype, data.dtype) up_dtype = scalar.upcast(self.dtype, data.dtype)
if up_dtype == self.dtype: if up_dtype == self.dtype:
data = gpuarray.array(data, dtype=self.typecode, copy=False, data = gpuarray.array(data, dtype=self.typecode, copy=False)
kind=self.kind, context=self.context)
else: else:
raise TypeError("%s cannot store a value of dtype %s " raise TypeError("%s cannot store a value of dtype %s "
"without risking loss of precision." % "without risking loss of precision." %
...@@ -98,8 +82,7 @@ class GpuArrayType(Type): ...@@ -98,8 +82,7 @@ class GpuArrayType(Type):
return numpy.asarray(res).all() return numpy.asarray(res).all()
def value_zeros(self, shape): def value_zeros(self, shape):
return pygpu.gpuarray.zeros(shape, dtype=self.typecode, kind=self.kind, return pygpu.gpuarray.zeros(shape, dtype=self.typecode)
context=self.context)
def make_variable(self, name=None): def make_variable(self, name=None):
return self.Variable(self, name=name) return self.Variable(self, name=name)
...@@ -107,16 +90,13 @@ class GpuArrayType(Type): ...@@ -107,16 +90,13 @@ class GpuArrayType(Type):
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other) and return (type(self) == type(other) and
self.typecode == other.typecode and self.typecode == other.typecode and
self.broadcastable == other.broadcastable and self.broadcastable == other.broadcastable)
self.kind == other.kind and
self.context == other.context)
def __hash__(self): def __hash__(self):
return (hash(self.typecode) ^ hash(self.broadcastable) ^ return (hash(self.typecode) ^ hash(self.broadcastable))
hash(self.kind) ^ hash(self.context))
def __str__(self): def __str__(self):
return "GpuArray[%s, %s]<%s>" % (self.kind, self.context, self.dtype) return "GpuArray[%s, %s]<%s>" % (self.dtype,)
def c_declare(self, name, sub): def c_declare(self, name, sub):
return "GpuArrayObject *%s;" % (name,) return "GpuArrayObject *%s;" % (name,)
...@@ -144,7 +124,7 @@ class GpuArrayType(Type): ...@@ -144,7 +124,7 @@ class GpuArrayType(Type):
""" % {'name': name, 'fail': sub['fail']} """ % {'name': name, 'fail': sub['fail']}
def c_cleanup(self, name, sub): def c_cleanup(self, name, sub):
return "" return "Py_XDECREF(%(name)s); %(name)s = NULL;" % {'name': name }
def c_sync(self, name, sub): def c_sync(self, name, sub):
return """ return """
...@@ -167,7 +147,7 @@ class GpuArrayType(Type): ...@@ -167,7 +147,7 @@ class GpuArrayType(Type):
return [pygpu.get_include()] return [pygpu.get_include()]
def c_code_cache_version(self): def c_code_cache_version(self):
return () # TODO: This is temporary return (0,)
class _operators(tensor.basic._tensor_py_operators): class _operators(tensor.basic._tensor_py_operators):
...@@ -229,15 +209,14 @@ GpuArrayType.SharedVariable = GpuArraySharedVariable ...@@ -229,15 +209,14 @@ GpuArrayType.SharedVariable = GpuArraySharedVariable
def gpuarray_shared_constructor(value, name=None, strict=False, def gpuarray_shared_constructor(value, name=None, strict=False,
allow_downcast=None, borrow=False, allow_downcast=None, borrow=False,
broadcastable=None, kind=None, context=None): broadcastable=None):
"""SharedVariable constructor for GpuArrayType""" """SharedVariable constructor for GpuArrayType"""
if not isinstance(value, (numpy.ndarray, pygpu.gpuarray.GpuArray)): if not isinstance(value, (numpy.ndarray, pygpu.gpuarray.GpuArray)):
raise TypeError('ndarray or GpuArray required') raise TypeError('ndarray or GpuArray required')
if broadcastable is None: if broadcastable is None:
broadcastable = (False,) * value.ndim broadcastable = (False,) * value.ndim
type = GpuArrayType(value.dtype, broadcastable, kind=kind, context=context) type = GpuArrayType(value.dtype, broadcastable)
deviceval = pygpu.gpuarray.array(value, copy=(not borrow), kind=type.kind, deviceval = pygpu.gpuarray.array(value, copy=(not borrow))
context=type.context)
return GpuArraySharedVariable(type=type, value=deviceval, name=name, return GpuArraySharedVariable(type=type, value=deviceval, name=name,
strict=strict) strict=strict)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论