提交 01b993dd authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Update to the new API and use the default context.

上级 dc5d54fb
......@@ -32,21 +32,8 @@ from type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
def init_dev(dev):
import globals
if dev.startswith('cuda'):
# format is cuda<devnum>
globals.kind = 'cuda'
devnum = int(dev[4:])
elif dev.startswith('opencl'):
# format is opencl<platnum>:<devnum>
globals.kind = 'opencl'
devspec = dev[6:]
plat, dev = devspec.split(':')
devnum = int(dev)|(int(plat)<<16)
else:
globals.kind = None
if globals.kind:
globals.context = pygpu.gpuarray.init(globals.kind, devnum)
context = pygpu.init(dev)
pygpu.set_default_context(context)
if pygpu:
try:
......
......@@ -20,8 +20,8 @@ def as_gpuarray_variable(x):
return gpu_from_host(tensor_x)
def as_gpuarray(x, kind, context):
return gpuarray.array(x, kind=kind, context=context, copy=False)
def as_gpuarray(x):
return gpuarray.array(x, copy=False)
class HostFromGpu(Op):
......@@ -76,7 +76,7 @@ class HostFromGpu(Op):
%(name)s_ga);
if (%(name)s_ga == &%(name)s_ga_s) GpuArray_clear(%(name)s_ga);
if (%(name)serr != GA_NO_ERROR) {
PyErr_SetSring(PyExc_RuntimeError, "Could not read device data.");
PyErr_SetString(PyExc_RuntimeError, "Could not read device data.");
%(fail)s
}
""" % {'name': name, 'fail': sub['fail'], 'inp': inputs[0],
......@@ -120,7 +120,7 @@ class GpuFromHost(Op):
x, = inp
z, = out
type = node.outputs[0].type
z[0] = gpuarray.array(x, kind=type.kind, context=type.context)
z[0] = gpuarray.array(x)
def grad(self, inputs, grads):
gz, = grads
......@@ -141,17 +141,23 @@ class GpuFromHost(Op):
return """
PyArrayObject *%(name)s_tmp;
int %(name)serr;
if ((PyObject *)GpuArray_default_context == Py_None) {
PyErr_SetString(PyExc_ValueError, "No default context, gpuarray not initialized?");
%(fail)s
}
%(name)s_tmp = PyArray_GETCONTIGUOUS(%(inp)s);
if (%(name)s_tmp == NULL) {
%(fail)s
}
%(out)s = new_GpuArray((PyObject *)&GpuArrayType);
%(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context);
if (%(out)s == NULL) {
Py_DECREF(%(name)s_tmp);
%(fail)s
}
%(name)serr = GpuArray_empty(&%(out)s->ga, compyte_get_ops("%(kind)s"),
(void *)%(ctx)s, %(typecode)s,
%(name)serr = GpuArray_empty(&%(out)s->ga,
GpuArray_default_context->ops,
GpuArray_default_context->ctx,
%(typecode)s,
PyArray_NDIM(%(inp)s),
(size_t *)PyArray_DIMS(%(inp)s),
GA_C_ORDER);
......@@ -170,12 +176,12 @@ class GpuFromHost(Op):
PyErr_SetString(PyExc_RuntimeError, "Could not copy array data to device");
%(fail)s
}
""" % {'name': name, 'kind': type.kind, 'ctx': hex(type.context),
'inp': inputs[0], 'out': outputs[0], 'fail': sub['fail'],
""" % {'name': name, 'inp': inputs[0],
'out': outputs[0], 'fail': sub['fail'],
'typecode': type.typecode}
# Don't implement c_code_cache_version since we harcode the ctx address
# in the code block and this will not work across processes
def c_code_cache_version(self):
return (0,)
gpu_from_host = GpuFromHost()
......@@ -197,8 +203,7 @@ class GpuFromCuda(Op):
def perform(self, node, inp, out):
x, = inp
z, = out
z[0] = gpuarray.array(numpy.asarray(x), kind=globals.kind,
context=globals.context)
z[0] = gpuarray.array(numpy.asarray(x))
def grad(self, inputs, grads):
gz, = grads
......@@ -247,9 +252,6 @@ class GpuFromCuda(Op):
"""
def c_code(self, node, name, input, output, sub):
type = node.outputs[0].type
if type.kind != "cuda":
raise RuntimeError("GpuFromCuda for non-cuda dest")
return """
int %(name)serr;
gpudata *%(name)sdata;
......@@ -258,8 +260,8 @@ class GpuFromCuda(Op):
ssize_t *%(name)sstr;
cuCtxGetCurrent(&%(name)scur);
if (%(name)scur != cuda_get_ctx((void *)%(ctx)s)) {
PyErr_SetString(PyErr_ValueError, "Ambient context is not the same as output context.");
if (%(name)scur != cuda_get_ctx(GpuArray_default_context->ctx)) {
PyErr_SetString(PyErr_ValueError, "Ambient cuda context is not the same as output context.");
%(fail)s
}
%(name)sdims = (size_t *)calloc(%(in)s->nd, sizeof(size_t));
......@@ -286,7 +288,8 @@ class GpuFromCuda(Op):
%(fail)s
}
%(name)sdata = cuda_make_buf((void *)%(ctx)s, (CUdeviceptr)%(in)s->devdata,
%(name)sdata = cuda_make_buf(GpuArray_default_context->ctx,
(CUdeviceptr)%(in)s->devdata,
(size_t)%(in)s->data_allocated);
if (%(name)sdata == NULL) {
Py_DECREF(%(out)s);
......@@ -295,7 +298,8 @@ class GpuFromCuda(Op):
PyErr_SetString(PyExc_MemoryError, "Could not allocate gpudata structure.");
%(fail)s
}
%(name)serr = GpuArray_fromdata(&%(out)s->ga, compyte_get_ops("cuda"),
%(name)serr = GpuArray_fromdata(&%(out)s->ga,
GpuArray_default_context->ops,
%(name)sdata, 0, GA_FLOAT, %(in)s->nd,
%(name)sdims, %(name)sstr, 1);
free(%(name)sdims);
......@@ -307,11 +311,10 @@ class GpuFromCuda(Op):
}
Py_INCREF(%(in)s);
%(out)s->base = %(in)s;
""" % {'name':name, 'ctx': hex(type.context), 'in': inputs[0],
'out': outputs[0], 'fail': sub['fail']}
# Don't implement c_code_cache_version since we harcode the ctx address
# in the code block and this will not work across processes
""" % {'name':name, 'in': inputs[0], 'out': outputs[0],
'fail': sub['fail']}
def c_code_cache_version(self):
return (0,)
gpu_from_cuda = GpuFromCuda()
# This modules serves to stuff global values (like kind and context)
kind = None
context = None
......@@ -20,7 +20,7 @@ def test_transfer():
g = GpuArrayType(dtype='float32', broadcastable=(False, False))('g')
av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
gv = gpuarray.array(av, kind=g.type.kind, context=g.type.context)
gv = gpuarray.array(av)
f = theano.function([a], gpu_from_host(a))
fv = f(av)
......
......@@ -15,21 +15,11 @@ except ImportError:
pass
class GpuArrayType(Type):
def __init__(self, dtype, broadcastable, kind=None, context=None,
name=None):
import globals
if kind is None:
kind = globals.kind
if context is None:
context = globals.context
def __init__(self, dtype, broadcastable, name=None):
# In case this was not provided and no global value is available
if kind is None:
raise RuntimeError("pygpu is not initialized")
self.dtype = str(dtype)
self.broadcastable = tuple(bool(b) for b in broadcastable)
self.ndim = len(self.broadcastable)
self.kind = kind
self.context = context
self.name = name
try:
self.typecode = gpuarray.dtype_to_typecode(self.dtype)
......@@ -42,10 +32,6 @@ class GpuArrayType(Type):
if not isinstance(data, gpuarray.GpuArray):
raise TypeError("%s expected a GpuArray object." % self,
data, type(data))
if self.kind != data.kind:
raise TypeError("kind of GpuArray does not match")
if self.context != data.context:
raise TypeError("context of GpuArray differs")
if self.typecode != data.typecode:
raise TypeError("%s expected typecode %d (dtype %s), "
"got %d (dtype %s)." %
......@@ -54,13 +40,11 @@ class GpuArrayType(Type):
# fallthrough to ndim check
elif allow_downcast:
data = gpuarray.array(data, dtype=self.typecode, copy=False,
kind=self.kind, context=self.context,
ndmin=len(self.broadcastable))
else:
up_dtype = scalar.upcast(self.dtype, data.dtype)
if up_dtype == self.dtype:
data = gpuarray.array(data, dtype=self.typecode, copy=False,
kind=self.kind, context=self.context)
data = gpuarray.array(data, dtype=self.typecode, copy=False)
else:
raise TypeError("%s cannot store a value of dtype %s "
"without risking loss of precision." %
......@@ -98,8 +82,7 @@ class GpuArrayType(Type):
return numpy.asarray(res).all()
def value_zeros(self, shape):
return pygpu.gpuarray.zeros(shape, dtype=self.typecode, kind=self.kind,
context=self.context)
return pygpu.gpuarray.zeros(shape, dtype=self.typecode)
def make_variable(self, name=None):
return self.Variable(self, name=name)
......@@ -107,16 +90,13 @@ class GpuArrayType(Type):
def __eq__(self, other):
return (type(self) == type(other) and
self.typecode == other.typecode and
self.broadcastable == other.broadcastable and
self.kind == other.kind and
self.context == other.context)
self.broadcastable == other.broadcastable)
def __hash__(self):
return (hash(self.typecode) ^ hash(self.broadcastable) ^
hash(self.kind) ^ hash(self.context))
return (hash(self.typecode) ^ hash(self.broadcastable))
def __str__(self):
return "GpuArray[%s, %s]<%s>" % (self.kind, self.context, self.dtype)
return "GpuArray[%s, %s]<%s>" % (self.dtype,)
def c_declare(self, name, sub):
return "GpuArrayObject *%s;" % (name,)
......@@ -144,7 +124,7 @@ class GpuArrayType(Type):
""" % {'name': name, 'fail': sub['fail']}
def c_cleanup(self, name, sub):
return ""
return "Py_XDECREF(%(name)s); %(name)s = NULL;" % {'name': name }
def c_sync(self, name, sub):
return """
......@@ -167,7 +147,7 @@ class GpuArrayType(Type):
return [pygpu.get_include()]
def c_code_cache_version(self):
return () # TODO: This is temporary
return (0,)
class _operators(tensor.basic._tensor_py_operators):
......@@ -229,15 +209,14 @@ GpuArrayType.SharedVariable = GpuArraySharedVariable
def gpuarray_shared_constructor(value, name=None, strict=False,
allow_downcast=None, borrow=False,
broadcastable=None, kind=None, context=None):
broadcastable=None):
"""SharedVariable constructor for GpuArrayType"""
if not isinstance(value, (numpy.ndarray, pygpu.gpuarray.GpuArray)):
raise TypeError('ndarray or GpuArray required')
if broadcastable is None:
broadcastable = (False,) * value.ndim
type = GpuArrayType(value.dtype, broadcastable, kind=kind, context=context)
deviceval = pygpu.gpuarray.array(value, copy=(not borrow), kind=type.kind,
context=type.context)
type = GpuArrayType(value.dtype, broadcastable)
deviceval = pygpu.gpuarray.array(value, copy=(not borrow))
return GpuArraySharedVariable(type=type, value=deviceval, name=name,
strict=strict)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论