提交 b895c6e8 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4542 from abergeron/api_changes

libgpuarray api changes
......@@ -42,7 +42,7 @@ register_transfer(transfer)
def init_dev(dev, name=None):
v = pygpu.gpuarray.api_version()
expected = -9998
expected = -9997
if v[0] != expected:
raise RuntimeError("Wrong major API version for gpuarray:", v[0],
"Make sure Theano and libgpuarray/pygpu "
......
......@@ -259,14 +259,14 @@ class GpuKernelBase(object):
int types[%(numargs)u] = {%(types)s};
const char *bcode = %(bvar)s;
size_t sz = sizeof(%(bvar)s);
if (GpuKernel_init(&%(ovar)s, %(ctx)s->ops, %(ctx)s->ctx, 1, &bcode, &sz,
if (GpuKernel_init(&%(ovar)s, %(ctx)s->ctx, 1, &bcode, &sz,
"%(kname)s", %(numargs)u, types, GA_USE_BINARY, NULL)
!= GA_NO_ERROR) {
if ((err = GpuKernel_init(&%(ovar)s, %(ctx)s->ops, %(ctx)s->ctx, 1,
if ((err = GpuKernel_init(&%(ovar)s, %(ctx)s->ctx, 1,
&%(cname)s, NULL, "%(kname)s", %(numargs)u,
types, %(flags)s, NULL)) != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuKernel_init error %%d: %%s",
err, Gpu_error(%(ctx)s->ops, %(ctx)s->ctx, err));
err, gpucontext_error(%(ctx)s->ctx, err));
%(fail)s
}
}
......@@ -310,7 +310,7 @@ class GpuKernelBase(object):
The node that we need the cache version for.
"""
return (3, self.get_params(node).bin_id)
return (4, self.get_params(node).bin_id)
class HostFromGpu(Op):
......
......@@ -24,16 +24,9 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
size_t *offW = NULL;
size_t *offInp = NULL;
size_t *offOut = NULL;
gpuarray_blas_ops *blas_ops;
int err;
err = ctx->ops->property(ctx->ctx, NULL, NULL,
GA_CTX_PROP_BLAS_OPS, &blas_ops);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't get blas ops");
return -1;
}
err = blas_ops->setup(ctx->ctx);
err = gpublas_setup(ctx->ctx);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't setup blas");
return -1;
......@@ -93,29 +86,29 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
}
if (out->ga.typecode == GA_FLOAT) {
err = blas_ops->sgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
err = gpublas_sgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
} else if (out->ga.typecode == GA_DOUBLE) {
err = blas_ops->dgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
err = gpublas_dgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
} else if (out->ga.typecode == GA_HALF) {
err = blas_ops->sgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
err = gpublas_sgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
} else {
err = GA_INVALID_ERROR;
}
......
......@@ -12,16 +12,9 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
size_t *offOut = NULL;
size_t *offX = NULL;
size_t *offY = NULL;
gpuarray_blas_ops *blas_ops;
int err;
err = ctx->ops->property(ctx->ctx, NULL, NULL,
GA_CTX_PROP_BLAS_OPS, &blas_ops);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't get blas ops");
return -1;
}
err = blas_ops->setup(ctx->ctx);
err = gpublas_setup(ctx->ctx);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't setup blas");
return -1;
......@@ -84,26 +77,26 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
ssize_t str_out = PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode);
if (out->ga.typecode == GA_FLOAT) {
err = blas_ops->sgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(float *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
err = gpublas_sgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(float *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
} else if (out->ga.typecode == GA_DOUBLE) {
err = blas_ops->dgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(double *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
err = gpublas_dgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(double *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
} else if (out->ga.typecode == GA_HALF) {
err = blas_ops->hgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(float *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
err = gpublas_hgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(float *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
} else {
err = GA_INVALID_ERROR;
}
......
......@@ -125,7 +125,7 @@ def dnn_available(context_name):
ctx = get_context(context_name)
if not ctx.kind == 'cuda':
if not ctx.kind == b'cuda':
dnn_available.msg = "Not on a CUDA device."
return False
......
......@@ -105,7 +105,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
algo = choice.algo;
#else
size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
......@@ -234,7 +234,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
* to place a nice get_work_mem() function in.
*/
if (worksize != 0) {
workspace = c->ops->buffer_alloc(c->ctx, worksize, NULL, 0, NULL);
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"Could not allocate working memory");
......@@ -258,7 +258,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output));
if (worksize != 0)
c->ops->buffer_release(workspace);
gpudata_release(workspace);
cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
......@@ -106,7 +106,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo = choice.algo;
#else
size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
......@@ -204,7 +204,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
if (worksize != 0) {
workspace = c->ops->buffer_alloc(c->ctx, worksize, NULL, 0, NULL);
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"Could not allocate working memory");
......@@ -227,7 +227,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(*input));
if (worksize != 0)
c->ops->buffer_release(workspace);
gpudata_release(workspace);
cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
......@@ -107,7 +107,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo = choice.algo;
#else
size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
......@@ -192,7 +192,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
if (worksize != 0) {
workspace = c->ops->buffer_alloc(c->ctx, worksize, NULL, 0, NULL);
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not allocate working memory");
cuda_exit(c->ctx);
......@@ -214,7 +214,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(*kerns));
if (worksize != 0)
c->ops->buffer_release(workspace);
gpudata_release(workspace);
cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
......@@ -199,7 +199,7 @@ class GpuElemwise(HideC, Elemwise):
typecode=o.type.typecode)
res += """
ge = GpuElemwise_new(%(ctx)s->ops, %(ctx)s->ctx, %(support)s, %(kop)s, %(nargs)s, args, %(nd)s, 0);
ge = GpuElemwise_new(%(ctx)s->ctx, %(support)s, %(kop)s, %(nargs)s, args, %(nd)s, 0);
if (ge == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not initialize elemwise support");
%(fail)s
......@@ -360,7 +360,7 @@ class GpuElemwise(HideC, Elemwise):
def c_code_cache_version(self):
ver = self.scalar_op.c_code_cache_version()
if ver:
return (6, ver)
return (7, ver)
else:
return ver
......@@ -554,7 +554,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
def make_node(self, x):
x = as_gpuarray_variable(x, infer_context_name(x))
if x.type.context.kind != 'cuda':
if x.type.context.kind != b'cuda':
raise TypeError("GpuCAReduceCuda doesn't work for non-cuda devices")
ret = super(GpuCAReduceCuda, self).make_node(x)
self = copy.copy(self)
......
......@@ -26,11 +26,8 @@ class GpuCumsum(GpuKernelBase, Op):
def __init__(self, axis):
self.axis = axis
def __str__(self):
return "%s{%s}" % (self.__class__.__name__, self.axis)
def c_code_cache_version_apply(self, node):
return (1,)
def c_code_cache_version(self):
return (3,)
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>', '<gpuarray_helper.h>']
......@@ -221,7 +218,7 @@ class GpuCumsum(GpuKernelBase, Op):
return kernels
def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda':
if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError("cuda only")
x, = inp
z, = out
......@@ -249,17 +246,17 @@ class GpuCumsum(GpuKernelBase, Op):
size_t max_grid_size1;
size_t max_grid_size2;
int err;
err = %(ctx)s->ops->property(%(ctx)s->ctx, NULL, NULL, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0);
err = gpucontext_property(%(ctx)s->ctx, GA_CTX_PROP_MAXLSIZE0, &max_threads_dim0);
if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_threads_dims0");
%(fail)s;
}
err = %(ctx)s->ops->property(%(ctx)s->ctx, NULL, NULL, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1);
err = gpucontext_property(%(ctx)s->ctx, GA_CTX_PROP_MAXGSIZE1, &max_grid_size1);
if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size1");
%(fail)s;
}
err = %(ctx)s->ops->property(%(ctx)s->ctx, NULL, NULL, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2);
err = gpucontext_property(%(ctx)s->ctx, GA_CTX_PROP_MAXGSIZE2, &max_grid_size2);
if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "Could not fetch max_grid_size2");
%(fail)s;
......
......@@ -117,7 +117,7 @@ int gemm16(PyGpuArrayObject *C, float alpha,
if (48 < n128 && n128 <= 64) {
n64 = n / 64;
if (nprocs == 0)
if (A->ga.ops->property(A->context->ctx, NULL, NULL,
if (gpucontext_property(A->context->ctx,
GA_CTX_PROP_NUMPROCS, &nprocs)) {
nprocs = 0;
res = 1;
......
......@@ -243,7 +243,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
return kernels
def c_code(self, node, name, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda':
if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError("cuda only")
dtype_ten4 = node.inputs[0].dtype
dtype_neib_shape = node.inputs[1].dtype
......
......@@ -105,7 +105,7 @@ class Gemm16(COp):
return """
bcode = bin_%(name)s;
sz = sizeof(bin_%(name)s);
if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz,
if (GpuKernel_init(&k_%(name)s, c->ctx, 1, &bcode, &sz,
"hgemm_%(name)s", 13, types, GA_USE_BINARY, NULL)
!= GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Could not initialize kernel %(name)s");
......
......@@ -189,7 +189,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
flags=flags, objvar=k_var)]
def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda':
if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError('cuda only')
typecode_x = pygpu.gpuarray.dtype_to_typecode(node.inputs[0].dtype)
typecode_b = pygpu.gpuarray.dtype_to_typecode(node.inputs[1].dtype)
......@@ -375,7 +375,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>']
def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda':
if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError("cuda only")
typecode_dx = pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
itemsize_dnll = numpy.dtype(node.inputs[0].dtype).itemsize
......@@ -584,7 +584,7 @@ class GpuSoftmax(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>']
def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda':
if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError("cuda only")
dtype_x = node.inputs[0].dtype
work_x = work_dtype(dtype_x)
......@@ -783,7 +783,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
return ['<numpy_compat.h>', '<gpuarray/types.h>']
def c_code(self, node, nodename, inp, out, sub):
if node.inputs[0].type.context.kind != 'cuda':
if node.inputs[0].type.context.kind != b'cuda':
raise NotImplementedError('cuda only')
dtype_x = node.inputs[0].dtype
dtype_b = node.inputs[1].dtype
......
......@@ -146,7 +146,7 @@ def op_lifter(OP, cuda_only=False):
# Check if we should replace
if (not replace or
(cuda_only and
get_context(context_name).kind != 'cuda')):
get_context(context_name).kind != b'cuda')):
return False
# tag the inputs with the context in case
......@@ -643,7 +643,7 @@ def local_gpua_advanced_subtensor(node, context_name):
def local_gpua_advanced_incsubtensor(node, context_name):
context = get_context(context_name)
# This is disabled on non-cuda contexts
if context.kind != 'cuda':
if context.kind != b'cuda':
return None
x, y, ilist = node.inputs
......@@ -674,12 +674,12 @@ def local_gpua_careduce(node, context_name):
if isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul,
scalar.Maximum, scalar.Minimum)):
ctx = get_context(context_name)
if ctx.kind == 'opencl':
if ctx.kind == b'opencl':
op = GpuCAReduceCPY
if node.op.scalar_op not in [scalar.add, scalar.mul]:
# We don't support yet all reduction with cpy code.
return
elif ctx.kind == 'cuda':
elif ctx.kind == b'cuda':
op = GpuCAReduceCuda
else:
return False
......
......@@ -340,7 +340,7 @@ class GpuIncSubtensor(IncSubtensor):
args[1].name = "b";
args[1].typecode = %(type2)s;
args[1].flags = GE_READ;
iadd = GpuElemwise_new(%(ctx)s->ops, %(ctx)s->ctx, "", "a += b",
iadd = GpuElemwise_new(%(ctx)s->ctx, "", "a += b",
2, args, %(nd)s, 0);
if (iadd == NULL) {
PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support");
......@@ -369,7 +369,7 @@ class GpuIncSubtensor(IncSubtensor):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
if not parent_version:
return
return parent_version + (5,)
return parent_version + (6,)
class GpuAdvancedSubtensor1(HideC, tensor.AdvancedSubtensor1):
......@@ -437,8 +437,7 @@ if (err != GA_NO_ERROR) {
if (err == GA_VALUE_ERROR) {
PyErr_SetString(PyExc_IndexError, "Index out of bounds.");
} else {
PyErr_SetString(PyExc_RuntimeError, Gpu_error(%(v)s->context->ops,
%(v)s->context->ctx, err));
PyErr_SetString(PyExc_RuntimeError, GpuArray_error(&%(v)s->ga, err));
}
%(fail)s
}
......@@ -589,7 +588,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
return super(GpuAdvancedIncSubtensor1_dev20, self).perform(node, inp, out)
def c_code_cache_version(self):
return (6,)
return (8,)
def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray_helper.h>',
......@@ -600,7 +599,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
def c_code(self, node, name, inputs, outputs, sub):
ctx = self.get_params(node)
if ctx.kind != 'cuda':
if ctx.kind != b'cuda':
raise NotImplementedError("cuda only")
if (self.set_instead_of_inc or
node.inputs[0].ndim != node.inputs[1].ndim or
......@@ -757,8 +756,8 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
int err, kerr = 0;
if (threads_per_block[0] > 0 && n_blocks[0] > 0) {
err = py_self->ga.ops->property(NULL, py_self->ga.data, NULL,
GA_CTX_PROP_ERRBUF, &errbuf);
err = gpudata_property(py_self->ga.data,
GA_CTX_PROP_ERRBUF, &errbuf);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't fetch error buffer");
return 1;
......@@ -793,7 +792,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
GpuKernel_error(&%(k_var)s, err));
return 1;
}
err = py_self->ga.ops->buffer_read(&kerr, errbuf, 0, sizeof(int));
err = gpudata_read(&kerr, errbuf, 0, sizeof(int));
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't read error buffer");
return 1;
......@@ -801,7 +800,7 @@ __device__ ga_half atomicAdd(ga_half *addr, ga_half val) {
if (kerr != 0) {
PyErr_SetString(PyExc_IndexError, "Index out of bounds");
kerr = 0;
py_self->ga.ops->buffer_write(errbuf, 0, &kerr, sizeof(int));
gpudata_write(errbuf, 0, &kerr, sizeof(int));
return 1;
}
}
......
......@@ -197,7 +197,7 @@ class test_GpuCAReduceCuda(test_GpuCAReduceCPY):
def setUp(self):
super(test_GpuCAReduceCuda, self).setUp()
if get_context(test_ctx_name).kind != 'cuda':
if get_context(test_ctx_name).kind != b'cuda':
raise SkipTest("Cuda specific tests")
......@@ -212,7 +212,7 @@ class T_gpureduce_dtype(test_elemwise.T_reduce_dtype):
'float32', 'float64']
def setUp(self):
if get_context(test_ctx_name).kind != 'cuda':
if get_context(test_ctx_name).kind != b'cuda':
raise SkipTest("Cuda specific tests")
......
......@@ -24,7 +24,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
def setUp(self):
super(TestGpuCumsum, self).setUp()
test_ctx = get_context(test_ctx_name)
if test_ctx.kind != 'cuda':
if test_ctx.kind != b'cuda':
raise SkipTest("Cuda specific tests")
self.max_threads_dim0 = test_ctx.maxlsize0
self.max_grid_size1 = test_ctx.maxgsize2
......
......@@ -125,7 +125,7 @@ def test_reduce():
topo = f.maker.fgraph.toposort()
ops = [type(node.op) for node in topo]
if kind == 'opencl' and method in ["max", "min"]:
if kind == b'opencl' and method in ["max", "min"]:
assert not(GpuCAReduceCuda in ops or GpuCAReduceCPY in ops)
else:
assert GpuCAReduceCuda in ops or GpuCAReduceCPY in ops
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论