提交 b895c6e8 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4542 from abergeron/api_changes

libgpuarray api changes
...@@ -42,7 +42,7 @@ register_transfer(transfer) ...@@ -42,7 +42,7 @@ register_transfer(transfer)
def init_dev(dev, name=None): def init_dev(dev, name=None):
v = pygpu.gpuarray.api_version() v = pygpu.gpuarray.api_version()
expected = -9998 expected = -9997
if v[0] != expected: if v[0] != expected:
raise RuntimeError("Wrong major API version for gpuarray:", v[0], raise RuntimeError("Wrong major API version for gpuarray:", v[0],
"Make sure Theano and libgpuarray/pygpu " "Make sure Theano and libgpuarray/pygpu "
......
...@@ -259,14 +259,14 @@ class GpuKernelBase(object): ...@@ -259,14 +259,14 @@ class GpuKernelBase(object):
int types[%(numargs)u] = {%(types)s}; int types[%(numargs)u] = {%(types)s};
const char *bcode = %(bvar)s; const char *bcode = %(bvar)s;
size_t sz = sizeof(%(bvar)s); size_t sz = sizeof(%(bvar)s);
if (GpuKernel_init(&%(ovar)s, %(ctx)s->ops, %(ctx)s->ctx, 1, &bcode, &sz, if (GpuKernel_init(&%(ovar)s, %(ctx)s->ctx, 1, &bcode, &sz,
"%(kname)s", %(numargs)u, types, GA_USE_BINARY, NULL) "%(kname)s", %(numargs)u, types, GA_USE_BINARY, NULL)
!= GA_NO_ERROR) { != GA_NO_ERROR) {
if ((err = GpuKernel_init(&%(ovar)s, %(ctx)s->ops, %(ctx)s->ctx, 1, if ((err = GpuKernel_init(&%(ovar)s, %(ctx)s->ctx, 1,
&%(cname)s, NULL, "%(kname)s", %(numargs)u, &%(cname)s, NULL, "%(kname)s", %(numargs)u,
types, %(flags)s, NULL)) != GA_NO_ERROR) { types, %(flags)s, NULL)) != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuKernel_init error %%d: %%s", PyErr_Format(PyExc_RuntimeError, "GpuKernel_init error %%d: %%s",
err, Gpu_error(%(ctx)s->ops, %(ctx)s->ctx, err)); err, gpucontext_error(%(ctx)s->ctx, err));
%(fail)s %(fail)s
} }
} }
...@@ -310,7 +310,7 @@ class GpuKernelBase(object): ...@@ -310,7 +310,7 @@ class GpuKernelBase(object):
The node that we need the cache version for. The node that we need the cache version for.
""" """
return (3, self.get_params(node).bin_id) return (4, self.get_params(node).bin_id)
class HostFromGpu(Op): class HostFromGpu(Op):
......
...@@ -24,16 +24,9 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W, ...@@ -24,16 +24,9 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
size_t *offW = NULL; size_t *offW = NULL;
size_t *offInp = NULL; size_t *offInp = NULL;
size_t *offOut = NULL; size_t *offOut = NULL;
gpuarray_blas_ops *blas_ops;
int err; int err;
err = ctx->ops->property(ctx->ctx, NULL, NULL, err = gpublas_setup(ctx->ctx);
GA_CTX_PROP_BLAS_OPS, &blas_ops);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't get blas ops");
return -1;
}
err = blas_ops->setup(ctx->ctx);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't setup blas"); PyErr_SetString(PyExc_RuntimeError, "Can't setup blas");
return -1; return -1;
...@@ -93,29 +86,29 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W, ...@@ -93,29 +86,29 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
} }
if (out->ga.typecode == GA_FLOAT) { if (out->ga.typecode == GA_FLOAT) {
err = blas_ops->sgemvBatch(cb_fortran, transA, err = gpublas_sgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2], PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1, PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda, W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode), inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode), 1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0); PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
} else if (out->ga.typecode == GA_DOUBLE) { } else if (out->ga.typecode == GA_DOUBLE) {
err = blas_ops->dgemvBatch(cb_fortran, transA, err = gpublas_dgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2], PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1, PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda, W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode), inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode), 1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0); PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
} else if (out->ga.typecode == GA_HALF) { } else if (out->ga.typecode == GA_HALF) {
err = blas_ops->sgemvBatch(cb_fortran, transA, err = gpublas_sgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2], PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1, PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda, W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode), inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode), 1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0); PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
} else { } else {
err = GA_INVALID_ERROR; err = GA_INVALID_ERROR;
} }
......
...@@ -12,16 +12,9 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x, ...@@ -12,16 +12,9 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
size_t *offOut = NULL; size_t *offOut = NULL;
size_t *offX = NULL; size_t *offX = NULL;
size_t *offY = NULL; size_t *offY = NULL;
gpuarray_blas_ops *blas_ops;
int err; int err;
err = ctx->ops->property(ctx->ctx, NULL, NULL, err = gpublas_setup(ctx->ctx);
GA_CTX_PROP_BLAS_OPS, &blas_ops);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't get blas ops");
return -1;
}
err = blas_ops->setup(ctx->ctx);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't setup blas"); PyErr_SetString(PyExc_RuntimeError, "Can't setup blas");
return -1; return -1;
...@@ -84,26 +77,26 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x, ...@@ -84,26 +77,26 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
ssize_t str_out = PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode); ssize_t str_out = PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode);
if (out->ga.typecode == GA_FLOAT) { if (out->ga.typecode == GA_FLOAT) {
err = blas_ops->sgerBatch(cb_fortran, err = gpublas_sgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2], PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(float *)PyArray_GETPTR1(alpha, 0), *(float *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x, y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out, o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0); PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
} else if (out->ga.typecode == GA_DOUBLE) { } else if (out->ga.typecode == GA_DOUBLE) {
err = blas_ops->dgerBatch(cb_fortran, err = gpublas_dgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2], PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(double *)PyArray_GETPTR1(alpha, 0), *(double *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x, y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out, o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0); PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
} else if (out->ga.typecode == GA_HALF) { } else if (out->ga.typecode == GA_HALF) {
err = blas_ops->hgerBatch(cb_fortran, err = gpublas_hgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2], PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(float *)PyArray_GETPTR1(alpha, 0), *(float *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x, y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out, o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0); PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
} else { } else {
err = GA_INVALID_ERROR; err = GA_INVALID_ERROR;
} }
......
...@@ -125,7 +125,7 @@ def dnn_available(context_name): ...@@ -125,7 +125,7 @@ def dnn_available(context_name):
ctx = get_context(context_name) ctx = get_context(context_name)
if not ctx.kind == 'cuda': if not ctx.kind == b'cuda':
dnn_available.msg = "Not on a CUDA device." dnn_available.msg = "Not on a CUDA device."
return False return False
......
...@@ -105,7 +105,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -105,7 +105,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
algo = choice.algo; algo = choice.algo;
#else #else
size_t free; size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free); int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) { if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the " PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
...@@ -234,7 +234,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -234,7 +234,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
* to place a nice get_work_mem() function in. * to place a nice get_work_mem() function in.
*/ */
if (worksize != 0) { if (worksize != 0) {
workspace = c->ops->buffer_alloc(c->ctx, worksize, NULL, 0, NULL); workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) { if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"Could not allocate working memory"); "Could not allocate working memory");
...@@ -258,7 +258,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -258,7 +258,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output)); APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output));
if (worksize != 0) if (worksize != 0)
c->ops->buffer_release(workspace); gpudata_release(workspace);
cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
...@@ -106,7 +106,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -106,7 +106,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo = choice.algo; algo = choice.algo;
#else #else
size_t free; size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free); int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) { if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the " PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
...@@ -204,7 +204,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -204,7 +204,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
} }
if (worksize != 0) { if (worksize != 0) {
workspace = c->ops->buffer_alloc(c->ctx, worksize, NULL, 0, NULL); workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) { if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"Could not allocate working memory"); "Could not allocate working memory");
...@@ -227,7 +227,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -227,7 +227,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(*input)); APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(*input));
if (worksize != 0) if (worksize != 0)
c->ops->buffer_release(workspace); gpudata_release(workspace);
cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
...@@ -107,7 +107,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -107,7 +107,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo = choice.algo; algo = choice.algo;
#else #else
size_t free; size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free); int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) { if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the " PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
...@@ -192,7 +192,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -192,7 +192,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
} }
if (worksize != 0) { if (worksize != 0) {
workspace = c->ops->buffer_alloc(c->ctx, worksize, NULL, 0, NULL); workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) { if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not allocate working memory"); PyErr_SetString(PyExc_RuntimeError, "Could not allocate working memory");
cuda_exit(c->ctx); cuda_exit(c->ctx);
...@@ -214,7 +214,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -214,7 +214,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(*kerns)); APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(*kerns));
if (worksize != 0) if (worksize != 0)
c->ops->buffer_release(workspace); gpudata_release(workspace);
cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
...@@ -199,7 +199,7 @@ class GpuElemwise(HideC, Elemwise): ...@@ -199,7 +199,7 @@ class GpuElemwise(HideC, Elemwise):
typecode=o.type.typecode) typecode=o.type.typecode)
res += """ res += """
ge = GpuElemwise_new(%(ctx)s->ops, %(ctx)s->ctx, %(support)s, %(kop)s, %(nargs)s, args, %(nd)s, 0); ge = GpuElemwise_new(%(ctx)s->ctx, %(support)s, %(kop)s, %(nargs)s, args, %(nd)s, 0);
if (ge == NULL) { if (ge == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not initialize elemwise support"); PyErr_SetString(PyExc_RuntimeError, "Could not initialize elemwise support");
%(fail)s %(fail)s
...@@ -360,7 +360,7 @@ class GpuElemwise(HideC, Elemwise): ...@@ -360,7 +360,7 @@ class GpuElemwise(HideC, Elemwise):
def c_code_cache_version(self): def c_code_cache_version(self):
ver = self.scalar_op.c_code_cache_version() ver = self.scalar_op.c_code_cache_version()
if ver: if ver:
return (6, ver) return (7, ver)
else: else:
return ver return ver
...@@ -554,7 +554,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -554,7 +554,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
def make_node(self, x): def make_node(self, x):
x = as_gpuarray_variable(x, infer_context_name(x)) x = as_gpuarray_variable(x, infer_context_name(x))
if x.type.context.kind != 'cuda': if x.type.context.kind != b'cuda':
raise TypeError("GpuCAReduceCuda doesn't work for non-cuda devices") raise TypeError("GpuCAReduceCuda doesn't work for non-cuda devices")
ret = super(GpuCAReduceCuda, self).make_node(x) ret = super(GpuCAReduceCuda, self).make_node(x)
self = copy.copy(self) self = copy.copy(self)
......
...@@ -26,11 +26,8 @@ class GpuCumsum(GpuKernelBase, Op): ...@@ -26,11 +26,8 @@ class GpuCumsum(GpuKernelBase, Op):
def __init__(self, axis): def __init__(self, axis):
self.axis = axis self.axis = axis
def __str__(self): def c_code_cache_version(self):
return "%s{%s}" % (self.__class__.__name__, self.axis) return (3,)
def c_code_cache_version_apply(self, node):
return (1,)
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>'] return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>']
...@@ -221,7 +218,7 @@ class GpuCumsum(GpuKernelBase, Op): ...@@ -221,7 +218,7 @@ class GpuCumsum(GpuKernelBase, Op):
return kernels return kernels
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda': if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError("cuda only") raise NotImplementedError("cuda only")
x, = inp x, = inp
z, = out z, = out
...@@ -249,17 +246,17 @@ class GpuCumsum(GpuKernelBase, Op): ...@@ -249,17 +246,17 @@ class GpuCumsum(GpuKernelBase, Op):
size_t max_grid_size1; size_t max_grid_size1;
size_t max_grid_size2; size_t max_grid_size2;
int err; int err;
err = %(ctx)s->ops->property(%(ctx)s->ctx, NULL, NULL, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0); err = gpucontext_property(%(ctx)s->ctx, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0);
if (err != GA_NO_ERROR){ if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_threads_dims0"); PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_threads_dims0");
%(fail)s; %(fail)s;
} }
err = %(ctx)s->ops->property(%(ctx)s->ctx, NULL, NULL, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1); err = gpucontext_property(%(ctx)s->ctx, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1);
if (err != GA_NO_ERROR){ if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size1"); PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size1");
%(fail)s; %(fail)s;
} }
err = %(ctx)s->ops->property(%(ctx)s->ctx, NULL, NULL, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2); err = gpucontext_property(%(ctx)s->ctx, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2);
if (err != GA_NO_ERROR){ if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size2"); PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size2");
%(fail)s; %(fail)s;
......
...@@ -117,7 +117,7 @@ int gemm16(PyGpuArrayObject *C, float alpha, ...@@ -117,7 +117,7 @@ int gemm16(PyGpuArrayObject *C, float alpha,
if (48 < n128 && n128 <= 64) { if (48 < n128 && n128 <= 64) {
n64 = n / 64; n64 = n / 64;
if (nprocs == 0) if (nprocs == 0)
if (A->ga.ops->property(A->context->ctx, NULL, NULL, if (gpucontext_property(A->context->ctx,
GA_CTX_PROP_NUMPROCS, &nprocs)) { GA_CTX_PROP_NUMPROCS, &nprocs)) {
nprocs = 0; nprocs = 0;
res = 1; res = 1;
......
...@@ -243,7 +243,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -243,7 +243,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
return kernels return kernels
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda': if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError("cuda only") raise NotImplementedError("cuda only")
dtype_ten4 = node.inputs[0].dtype dtype_ten4 = node.inputs[0].dtype
dtype_neib_shape = node.inputs[1].dtype dtype_neib_shape = node.inputs[1].dtype
......
...@@ -105,7 +105,7 @@ class Gemm16(COp): ...@@ -105,7 +105,7 @@ class Gemm16(COp):
return """ return """
bcode = bin_%(name)s; bcode = bin_%(name)s;
sz = sizeof(bin_%(name)s); sz = sizeof(bin_%(name)s);
if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz, if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
"hgemm_%(name)s", 13, types, GA_USE_BINARY, NULL) "hgemm_%(name)s", 13, types, GA_USE_BINARY, NULL)
!= GA_NO_ERROR) { != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Could not initialize kernel %(name)s"); PyErr_SetString(PyExc_RuntimeError, "Could not initialize kernel %(name)s");
......
...@@ -189,7 +189,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -189,7 +189,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
flags=flags, objvar=k_var)] flags=flags, objvar=k_var)]
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda': if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError('cuda only') raise NotImplementedError('cuda only')
typecode_x = pygpu.gpuarray.dtype_to_typecode(node.inputs[0].dtype) typecode_x = pygpu.gpuarray.dtype_to_typecode(node.inputs[0].dtype)
typecode_b = pygpu.gpuarray.dtype_to_typecode(node.inputs[1].dtype) typecode_b = pygpu.gpuarray.dtype_to_typecode(node.inputs[1].dtype)
...@@ -375,7 +375,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op): ...@@ -375,7 +375,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>'] return ['<numpy_compat.h>', '<gpuarray/types.h>']
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda': if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError("cuda only") raise NotImplementedError("cuda only")
typecode_dx = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype) typecode_dx = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
itemsize_dnll = numpy.dtype(node.inputs[0].dtype).itemsize itemsize_dnll = numpy.dtype(node.inputs[0].dtype).itemsize
...@@ -584,7 +584,7 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -584,7 +584,7 @@ class GpuSoftmax(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>'] return ['<numpy_compat.h>', '<gpuarray/types.h>']
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda': if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError("cuda only") raise NotImplementedError("cuda only")
dtype_x = node.inputs[0].dtype dtype_x = node.inputs[0].dtype
work_x = work_dtype(dtype_x) work_x = work_dtype(dtype_x)
...@@ -783,7 +783,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -783,7 +783,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>'] return ['<numpy_compat.h>', '<gpuarray/types.h>']
def c_code(self, node, nodename, inp, out, sub): def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda': if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError('cuda only') raise NotImplementedError('cuda only')
dtype_x = node.inputs[0].dtype dtype_x = node.inputs[0].dtype
dtype_b = node.inputs[1].dtype dtype_b = node.inputs[1].dtype
......
...@@ -146,7 +146,7 @@ def op_lifter(OP, cuda_only=False): ...@@ -146,7 +146,7 @@ def op_lifter(OP, cuda_only=False):
# Check if we should replace # Check if we should replace
if (not replace or if (not replace or
(cuda_only and (cuda_only and
get_context(context_name).kind != 'cuda')): get_context(context_name).kind != b'cuda')):
return False return False
# tag the inputs with the context in case # tag the inputs with the context in case
...@@ -643,7 +643,7 @@ def local_gpua_advanced_subtensor(node, context_name): ...@@ -643,7 +643,7 @@ def local_gpua_advanced_subtensor(node, context_name):
def local_gpua_advanced_incsubtensor(node, context_name): def local_gpua_advanced_incsubtensor(node, context_name):
context = get_context(context_name) context = get_context(context_name)
# This is disabled on non-cuda contexts # This is disabled on non-cuda contexts
if context.kind != 'cuda': if context.kind != b'cuda':
return None return None
x, y, ilist = node.inputs x, y, ilist = node.inputs
...@@ -674,12 +674,12 @@ def local_gpua_careduce(node, context_name): ...@@ -674,12 +674,12 @@ def local_gpua_careduce(node, context_name):
if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul, if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
scalar.Maximum, scalar.Minimum)): scalar.Maximum, scalar.Minimum)):
ctx = get_context(context_name) ctx = get_context(context_name)
if ctx.kind == 'opencl': if ctx.kind == b'opencl':
op = GpuCAReduceCPY op = GpuCAReduceCPY
if node.op.scalar_op not in [scalar.add, scalar.mul]: if node.op.scalar_op not in [scalar.add, scalar.mul]:
# We don't support yet all reduction with cpy code. # We don't support yet all reduction with cpy code.
return return
elif ctx.kind == 'cuda': elif ctx.kind == b'cuda':
op = GpuCAReduceCuda op = GpuCAReduceCuda
else: else:
return False return False
......
...@@ -340,7 +340,7 @@ class GpuIncSubtensor(IncSubtensor): ...@@ -340,7 +340,7 @@ class GpuIncSubtensor(IncSubtensor):
args[1].name = "b"; args[1].name = "b";
args[1].typecode = %(type2)s; args[1].typecode = %(type2)s;
args[1].flags = GE_READ; args[1].flags = GE_READ;
iadd = GpuElemwise_new(%(ctx)s->ops, %(ctx)s->ctx, "", "a += b", iadd = GpuElemwise_new(%(ctx)s->ctx, "", "a += b",
2, args, %(nd)s, 0); 2, args, %(nd)s, 0);
if (iadd == NULL) { if (iadd == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support"); PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support");
...@@ -369,7 +369,7 @@ class GpuIncSubtensor(IncSubtensor): ...@@ -369,7 +369,7 @@ class GpuIncSubtensor(IncSubtensor):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version() parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
if not parent_version: if not parent_version:
return return
return parent_version + (5,) return parent_version + (6,)
class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1): class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
...@@ -437,8 +437,7 @@ if (err != GA_NO_ERROR) { ...@@ -437,8 +437,7 @@ if (err != GA_NO_ERROR) {
if (err == GA_VALUE_ERROR) { if (err == GA_VALUE_ERROR) {
PyErr_SetString(PyExc_IndexError, "Index out of bounds."); PyErr_SetString(PyExc_IndexError, "Index out of bounds.");
} else { } else {
PyErr_SetString(PyExc_RuntimeError, Gpu_error(%(v)s->context->ops, PyErr_SetString(PyExc_RuntimeError, GpuArray_error(&%(v)s->ga, err));
%(v)s->context->ctx, err));
} }
%(fail)s %(fail)s
} }
...@@ -589,7 +588,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1): ...@@ -589,7 +588,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out) return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out)
def c_code_cache_version(self): def c_code_cache_version(self):
return (6,) return (8,)
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray_helper.h>', return ['<numpy_compat.h>', '<gpuarray_helper.h>',
...@@ -600,7 +599,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1): ...@@ -600,7 +599,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
ctx = self.get_params(node) ctx = self.get_params(node)
if ctx.kind != 'cuda': if ctx.kind != b'cuda':
raise NotImplementedError("cuda only") raise NotImplementedError("cuda only")
if (self.set_instead_of_inc or if (self.set_instead_of_inc or
node.inputs[0].ndim != node.inputs[1].ndim or node.inputs[0].ndim != node.inputs[1].ndim or
...@@ -757,8 +756,8 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) { ...@@ -757,8 +756,8 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
int err, kerr = 0; int err, kerr = 0;
if (threads_per_block[0] > 0 && n_blocks[0] > 0) { if (threads_per_block[0] > 0 && n_blocks[0] > 0) {
err = py_self->ga.ops->property(NULL, py_self->ga.data, NULL, err = gpudata_property(py_self->ga.data,
GA_CTX_PROP_ERRBUF, &errbuf); GA_CTX_PROP_ERRBUF, &errbuf);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't fetch error buffer"); PyErr_SetString(PyExc_RuntimeError, "Can't fetch error buffer");
return 1; return 1;
...@@ -793,7 +792,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) { ...@@ -793,7 +792,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
GpuKernel_error(&%(k_var)s, err)); GpuKernel_error(&%(k_var)s, err));
return 1; return 1;
} }
err = py_self->ga.ops->buffer_read(&kerr, errbuf, 0, sizeof(int)); err = gpudata_read(&kerr, errbuf, 0, sizeof(int));
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't read error buffer"); PyErr_SetString(PyExc_RuntimeError, "Can't read error buffer");
return 1; return 1;
...@@ -801,7 +800,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) { ...@@ -801,7 +800,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
if (kerr != 0) { if (kerr != 0) {
PyErr_SetString(PyExc_IndexError, "Index out of bounds"); PyErr_SetString(PyExc_IndexError, "Index out of bounds");
kerr = 0; kerr = 0;
py_self->ga.ops->buffer_write(errbuf, 0, &kerr, sizeof(int)); gpudata_write(errbuf, 0, &kerr, sizeof(int));
return 1; return 1;
} }
} }
......
...@@ -197,7 +197,7 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY): ...@@ -197,7 +197,7 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
def setUp(self): def setUp(self):
super(test_GpuCAReduceCuda, self).setUp() super(test_GpuCAReduceCuda, self).setUp()
if get_context(test_ctx_name).kind != 'cuda': if get_context(test_ctx_name).kind != b'cuda':
raise SkipTest("Cuda specific tests") raise SkipTest("Cuda specific tests")
...@@ -212,7 +212,7 @@ class T_gpureduce_dtype(test_elemwise.T_reduce_dtype): ...@@ -212,7 +212,7 @@ class T_gpureduce_dtype(test_elemwise.T_reduce_dtype):
'float32', 'float64'] 'float32', 'float64']
def setUp(self): def setUp(self):
if get_context(test_ctx_name).kind != 'cuda': if get_context(test_ctx_name).kind != b'cuda':
raise SkipTest("Cuda specific tests") raise SkipTest("Cuda specific tests")
......
...@@ -24,7 +24,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -24,7 +24,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
def setUp(self): def setUp(self):
super(TestGpuCumsum, self).setUp() super(TestGpuCumsum, self).setUp()
test_ctx = get_context(test_ctx_name) test_ctx = get_context(test_ctx_name)
if test_ctx.kind != 'cuda': if test_ctx.kind != b'cuda':
raise SkipTest("Cuda specific tests") raise SkipTest("Cuda specific tests")
self.max_threads_dim0 = test_ctx.maxlsize0 self.max_threads_dim0 = test_ctx.maxlsize0
self.max_grid_size1 = test_ctx.maxgsize2 self.max_grid_size1 = test_ctx.maxgsize2
......
...@@ -125,7 +125,7 @@ def test_reduce(): ...@@ -125,7 +125,7 @@ def test_reduce():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
ops = [type(node.op) for node in topo] ops = [type(node.op) for node in topo]
if kind == 'opencl' and method in ["max", "min"]: if kind == b'opencl' and method in ["max", "min"]:
assert not(GpuCAReduceCuda in ops or GpuCAReduceCPY in ops) assert not(GpuCAReduceCuda in ops or GpuCAReduceCPY in ops)
else: else:
assert GpuCAReduceCuda in ops or GpuCAReduceCPY in ops assert GpuCAReduceCuda in ops or GpuCAReduceCPY in ops
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论