提交 618b2d94 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Remove usage of new_GpuArray() in favor of the new pygpu_<> API.

上级 33fae103
......@@ -298,14 +298,6 @@ class GpuFromCuda(Op):
%(name)sstr[i] = (ssize_t)CudaNdarray_HOST_STRIDES(%(in)s)[i]*4;
}
Py_XDECREF(%(out)s);
%(out)s = new_GpuArray((PyObject *)&PyGpuArrayType, pygpu_default_context(), Py_None);
if (%(out)s == NULL) {
free(%(name)sdims);
free(%(name)sstr);
%(fail)s
}
%(name)sdata = cuda_make_buf(pygpu_default_context()->ctx,
(CUdeviceptr)%(in)s->devdata,
((size_t)%(in)s->data_allocated)*4);
......@@ -316,24 +308,23 @@ class GpuFromCuda(Op):
PyErr_SetString(PyExc_MemoryError, "Could not allocate gpudata structure.");
%(fail)s
}
%(name)serr = GpuArray_fromdata(&%(out)s->ga,
pygpu_default_context()->ops,
%(name)sdata, 0, GA_FLOAT, %(in)s->nd,
%(name)sdims, %(name)sstr, 1);
Py_XDECREF(%(out)s);
%(out)s = pygpu_fromgpudata(%(name)sdata, 0, GA_FLOAT, %(in)s->nd,
%(name)sdims, %(name)sstr,
pygpu_default_context(), 1,
(PyObject *)%(in)s,
(PyObject *)&GpuArrayType);
pygpu_default_context()->ops->buffer_release(%(name)sdata);
free(%(name)sdims);
free(%(name)sstr);
if (%(name)serr != GA_NO_ERROR) {
Py_DECREF(%(out)s);
PyErr_SetString(PyExc_MemoryError, "Could not allocate GpuArray structure.");
if (%(out)s == NULL) {
%(fail)s
}
Py_INCREF(%(in)s);
%(out)s->base = (PyObject *)%(in)s;
""" % {'name':name, 'in': inputs[0], 'out': outputs[0],
'fail': sub['fail']}
def c_code_cache_version(self):
return (3,)
return (4,)
gpu_from_cuda = GpuFromCuda()
......
......@@ -215,34 +215,23 @@ class GpuDimShuffle(HideC, DimShuffle):
if d in self.augment:
stmts.append("sh[%s] = 1;" % (d,))
else:
stmts.append("sh[%s] = tmp.dimensions[%s];" % (d, e))
stmts.append("sh[%s] = tmp->ga.dimensions[%s];" % (d, e))
e += 1
return '\n '.join(stmts)
return """
static const unsigned int %(name)s_ax[] = {%(shuffle)s};
static int %(name)s_f(PyGpuArrayObject *res, PyGpuArrayObject *a) {
GpuArray tmp;
static PyGpuArrayObject *%(name)s_f(PyGpuArrayObject *a) {
PyGpuArrayObject *res, *tmp;
size_t sh[%(nd_out)s];
unsigned int i;
int err;
err = GpuArray_transpose(&tmp, &a->ga, %(name)s_ax);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "error in _transpose call");
return -1;
}
tmp = pygpu_transpose(a, %(name)s_ax);
if (!tmp) return NULL;
%(copy_shape)s
err = GpuArray_reshape(&res->ga, &tmp, %(nd_out)s, sh,
GA_ANY_ORDER, 1);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "error in _reshape call");
return -1;
}
GpuArray_clear(&tmp);
return 0;
res = pygpu_reshape(tmp, %(nd_out)s, sh, GA_ANY_ORDER, 1);
Py_DECREF(tmp);
return res;
}
""" % dict(shuffle=', '.join(str(a) for a in (self.shuffle+self.drop)),
name=name, nd_out=len(self.new_order),
......@@ -252,38 +241,31 @@ class GpuDimShuffle(HideC, DimShuffle):
d = dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0],
nd=len(self.input_broadcastable))
process = """
PyGpuArrayObject *tmp = NULL;
if (%(inp)s->ga.nd != %(nd)s) {
PyErr_SetString(PyExc_TypeError, "input nd");
%(fail)s
}
Py_XDECREF(%(out)s);
%(out)s = new_GpuArray((PyObject *)&PyGpuArrayType, pygpu_default_context(), Py_None);
%(out)s = %(name)s_f(%(inp)s);
if (%(out)s == NULL) {%(fail)s}
if (%(name)s_f(%(out)s, %(inp)s)) {
%(fail)s
}
""" % d
if not self.inplace:
process += """
if (%(out)s->ga.data == %(inp)s->ga.data) {
PyObject *%(name)s_tmp;
%(name)s_tmp = PyObject_CallMethod((PyObject *)%(out)s, "copy", NULL);
if (%(name)s_tmp == NULL) { %(fail)s }
if (!PyObject_IsInstance(%(name)s_tmp, (PyObject *)&PyGpuArrayType)) {
PyErr_SetString(PyExc_TypeError, "not a GpuArray out of the copy");
%(fail)s
}
Py_DECREF(%(out)s);
%(out)s = (PyGpuArrayObject *)%(name)s_tmp;
tmp = pygpu_copy(%(out)s, GA_ANY_ORDER);
Py_DECREF(%(out)s);
if (!tmp) {
%(out)s = NULL;
%(fail)s
}
%(out)s = tmp;
""" % d
return process
def c_code_cache_version(self):
return (2,)
return (3,)
class GpuCAReduce(HideC, CAReduceDtype):
......
......@@ -78,6 +78,15 @@ class GpuSubtensor(HideC, Subtensor):
idx_list = self.idx_list + \
((slice(None),) * (inp_ndim - len(self.idx_list)))
# This case fails when we use pygpu_index(), so here is some
# special code
if len(idx_list) == 0:
return """
Py_XDECREF(%(out)s);
%(out)s = pygpu_copy(%(inp)s, GA_ANY_ORDER);
if (!%(out)s) { %(fail)s }
""" % dict(out=outputs[0], inp=inp, fail=sub['fail'])
sio = StringIO.StringIO()
print >> sio, """
ssize_t starts[%(sz)s];
......@@ -136,19 +145,11 @@ class GpuSubtensor(HideC, Subtensor):
print >>sio, """
Py_XDECREF(%(out)s);
%(out)s = new_GpuArray((PyObject *)&PyGpuArrayType, pygpu_default_context(), Py_None);
%(out)s = pygpu_index(%(inp)s, starts, stops, steps);
if (!%(out)s) { %(fail)s }
if ((err = GpuArray_index(&%(out)s->ga, &%(inp)s->ga, starts, stops, steps))) {
Py_DECREF(%(out)s); %(out)s = NULL;
if (err == GA_VALUE_ERROR)
PyErr_SetString(PyExc_IndexError, "index out of bounds");
else
PyErr_SetString(PyExc_RuntimeError, "index failed");
%(fail)s
}
""" % dict(name=name, fail=sub['fail'], inp=inp, out=outputs[0])
return sio.getvalue()
def c_code_cache_version(self):
return (3,)
return (4,)
......@@ -36,6 +36,7 @@ class test_GpuDimShuffle(test_DimShuffle):
class test_GpuCAReduce(test_CAReduce):
dtypes = ["float32"]
bin_dtypes = ["uint8", "int8"]
op = GpuCAReduce
reds = [scalar.add, scalar.mul]
......
......@@ -277,12 +277,6 @@ theano.compile.register_view_op_c_code(GpuArrayType, """
theano.compile.register_deep_copy_op_c_code(GpuArrayType, """
Py_XDECREF(%(oname)s);
%(oname)s = new_GpuArray((PyObject *)&PyGpuArrayType, pygpu_default_context(), Py_None);
%(oname)s = pygpu_copy(%(iname)s, GA_ANY_ORDER);
if (!%(oname)s) { %(fail)s }
int %(iname)s_err;
%(iname)s_err = GpuArray_copy(&%(oname)s->ga, &%(iname)s->ga, GA_ANY_ORDER);
if (%(iname)s_err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Error during copy");
%(fail)s
}
""", version=(4,))
""", version=(5,))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论