提交 c59491c7 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fixes for blas api changes.

上级 83e1f687
...@@ -24,15 +24,9 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W, ...@@ -24,15 +24,9 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
size_t *offW = NULL; size_t *offW = NULL;
size_t *offInp = NULL; size_t *offInp = NULL;
size_t *offOut = NULL; size_t *offOut = NULL;
gpuarray_blas_ops *blas_ops;
int err; int err;
err = gpucontext_property(ctx->ctx, GA_CTX_PROP_BLAS_OPS, &blas_ops); err = gpublas_setup(ctx->ctx);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't get blas ops");
return -1;
}
err = blas_ops->setup(ctx->ctx);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't setup blas"); PyErr_SetString(PyExc_RuntimeError, "Can't setup blas");
return -1; return -1;
...@@ -92,29 +86,29 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W, ...@@ -92,29 +86,29 @@ int APPLY_SPECIFIC(blockgemv)(PyGpuArrayObject *o, PyGpuArrayObject *W,
} }
if (out->ga.typecode == GA_FLOAT) { if (out->ga.typecode == GA_FLOAT) {
err = blas_ops->sgemvBatch(cb_fortran, transA, err = gpublas_sgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2], PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1, PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda, W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode), inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode), 1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0); PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
} else if (out->ga.typecode == GA_DOUBLE) { } else if (out->ga.typecode == GA_DOUBLE) {
err = blas_ops->dgemvBatch(cb_fortran, transA, err = gpublas_dgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2], PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1, PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda, W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode), inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode), 1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0); PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
} else if (out->ga.typecode == GA_HALF) { } else if (out->ga.typecode == GA_HALF) {
err = blas_ops->sgemvBatch(cb_fortran, transA, err = gpublas_sgemvBatch(cb_fortran, transA,
PyGpuArray_DIMS(out)[2], PyGpuArray_DIMS(out)[2],
PyGpuArray_DIMS(h)[2], 1, PyGpuArray_DIMS(h)[2], 1,
W_list, offW, lda, W_list, offW, lda,
inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode), inp_list, offInp, PyGpuArray_STRIDES(h)[2] / gpuarray_get_elsize(h->ga.typecode),
1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode), 1, out_list, offOut, PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode),
PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0); PyGpuArray_DIMS(out)[1] * PyGpuArray_DIMS(h)[1] * PyGpuArray_DIMS(out)[0], 0);
} else { } else {
err = GA_INVALID_ERROR; err = GA_INVALID_ERROR;
} }
......
...@@ -12,15 +12,9 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x, ...@@ -12,15 +12,9 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
size_t *offOut = NULL; size_t *offOut = NULL;
size_t *offX = NULL; size_t *offX = NULL;
size_t *offY = NULL; size_t *offY = NULL;
gpuarray_blas_ops *blas_ops;
int err; int err;
err = gpucontext_property(ctx->ctx, GA_CTX_PROP_BLAS_OPS, &blas_ops); err = gpublas_setup(ctx->ctx);
if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't get blas ops");
return -1;
}
err = blas_ops->setup(ctx->ctx);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Can't setup blas"); PyErr_SetString(PyExc_RuntimeError, "Can't setup blas");
return -1; return -1;
...@@ -83,26 +77,26 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x, ...@@ -83,26 +77,26 @@ int APPLY_SPECIFIC(blockger)(PyGpuArrayObject *o, PyGpuArrayObject *x,
ssize_t str_out = PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode); ssize_t str_out = PyGpuArray_STRIDES(out)[2] / gpuarray_get_elsize(out->ga.typecode);
if (out->ga.typecode == GA_FLOAT) { if (out->ga.typecode == GA_FLOAT) {
err = blas_ops->sgerBatch(cb_fortran, err = gpublas_sgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2], PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(float *)PyArray_GETPTR1(alpha, 0), *(float *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x, y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out, o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0); PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
} else if (out->ga.typecode == GA_DOUBLE) { } else if (out->ga.typecode == GA_DOUBLE) {
err = blas_ops->dgerBatch(cb_fortran, err = gpublas_dgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2], PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(double *)PyArray_GETPTR1(alpha, 0), *(double *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x, y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out, o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0); PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
} else if (out->ga.typecode == GA_HALF) { } else if (out->ga.typecode == GA_HALF) {
err = blas_ops->hgerBatch(cb_fortran, err = gpublas_hgerBatch(cb_fortran,
PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2], PyGpuArray_DIMS(y)[2], PyGpuArray_DIMS(x)[2],
*(float *)PyArray_GETPTR1(alpha, 0), *(float *)PyArray_GETPTR1(alpha, 0),
y_list, offY, str_y, x_list, offX, str_x, y_list, offY, str_y, x_list, offX, str_x,
o_list, offOut, str_out, o_list, offOut, str_out,
PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0); PyGpuArray_DIMS(x)[0] * PyGpuArray_DIMS(x)[1] * PyGpuArray_DIMS(y)[1], 0);
} else { } else {
err = GA_INVALID_ERROR; err = GA_INVALID_ERROR;
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论