提交 7c22fa2e authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5403 from abergeron/switch

Switch gs and ls to follow libgpuarray.
...@@ -53,7 +53,7 @@ PLATFORMS = ["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"] ...@@ -53,7 +53,7 @@ PLATFORMS = ["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"]
MAJOR = 0 MAJOR = 0
MINOR = 9 MINOR = 9
MICRO = 0 MICRO = 0
SUFFIX = "dev4" # Should be blank except for rc's, betas, etc. SUFFIX = "dev5" # Should be blank except for rc's, betas, etc.
ISRELEASED = False ISRELEASED = False
VERSION = '%d.%d.%d%s' % (MAJOR, MINOR, MICRO, SUFFIX) VERSION = '%d.%d.%d%s' % (MAJOR, MINOR, MICRO, SUFFIX)
......
...@@ -48,10 +48,10 @@ def init_dev(dev, name=None): ...@@ -48,10 +48,10 @@ def init_dev(dev, name=None):
if (pygpu.version.major, pygpu.version.minor) < (0, 6): if (pygpu.version.major, pygpu.version.minor) < (0, 6):
raise ValueError( raise ValueError(
"Your installed version of pygpu is too old, please upgrade to 0.6 or later") "Your installed version of pygpu is too old, please upgrade to 0.6 or later")
# This is for the C headers API # This is for the C headers API, we need to match the exact version.
if pygpu.gpuarray.api_version()[0] < 0: if pygpu.gpuarray.api_version()[0] != 1:
raise ValueError( raise ValueError(
"Your installed libgpuarray is too old, please update") "Your installed libgpuarray is not in sync, please make sure to have the appropriate version")
if dev not in init_dev.devmap: if dev not in init_dev.devmap:
context = pygpu.init( context = pygpu.init(
dev, dev,
......
...@@ -349,23 +349,23 @@ int {fname}(unsigned int _nd, size_t *_gdim, size_t *_ldim, size_t _shared, ...@@ -349,23 +349,23 @@ int {fname}(unsigned int _nd, size_t *_gdim, size_t *_ldim, size_t _shared,
{args}) {{ {args}) {{
{setargs} {setargs}
return GpuKernel_call(&{kname}, _nd, _ldim, _gdim, _shared, NULL); return GpuKernel_call(&{kname}, _nd, _gdim, _ldim, _shared, NULL);
}} }}
int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{ int {sname}(unsigned int _nd, size_t *_n, size_t _shared, {args}) {{
size_t _ls = 0;
size_t _gs = 0; size_t _gs = 0;
size_t _ls = 0;
int _err; int _err;
if (_nd != 1) return GA_UNSUPPORTED_ERROR; if (_nd != 1) return GA_UNSUPPORTED_ERROR;
_err = GpuKernel_sched(&{kname}, _n[0], &_ls, &_gs); _err = GpuKernel_sched(&{kname}, _n[0], &_gs, &_ls);
if (_err != GA_NO_ERROR) if (_err != GA_NO_ERROR)
return _err; return _err;
{setargs} {setargs}
return GpuKernel_call(&{kname}, 1, &_ls, &_gs, _shared, NULL); return GpuKernel_call(&{kname}, 1, &_gs, &_ls, _shared, NULL);
}} }}
""".format(args=args, fname=k.fname, setargs=setargs, sname=k.sname, """.format(args=args, fname=k.fname, setargs=setargs, sname=k.sname,
kname=k.objvar) kname=k.objvar)
......
...@@ -786,7 +786,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -786,7 +786,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
(void *)%(z)s->ga.data, (void *)%(z)s->ga.data,
(void *)&%(z)s->ga.offset, (void *)&%(z)s->ga.offset,
(void *)&stride_Z0}; (void *)&stride_Z0};
int err = GpuKernel_call(&%(k_var)s, 3, n_threads, n_blocks, n_shared, kernel_params); int err = GpuKernel_call(&%(k_var)s, 3, n_blocks, n_threads, n_shared, kernel_params);
%(err_check)s %(err_check)s
""" """
in_dtype = "npy_" + node.inputs[0].dtype in_dtype = "npy_" + node.inputs[0].dtype
...@@ -852,7 +852,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -852,7 +852,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
n_blocks[0],n_blocks[1],n_blocks[2], n_blocks[0],n_blocks[1],n_blocks[2],
n_blocks[0]*n_blocks[1]*n_blocks[2], n_blocks[0]*n_blocks[1]*n_blocks[2],
n_shared, %(shapes_data)s); n_shared, %(shapes_data)s);
int err = GpuKernel_call(&%(k_var)s, 3, n_threads, n_blocks, n_shared, kernel_params); int err = GpuKernel_call(&%(k_var)s, 3, n_blocks, n_threads, n_shared, kernel_params);
%(err_check)s %(err_check)s
""" % locals(), file=sio) """ % locals(), file=sio)
...@@ -1254,7 +1254,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1254,7 +1254,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
n_threads, numEls, n_threads, numEls,
PyGpuArray_NDIM(%(x)s)); PyGpuArray_NDIM(%(x)s));
size_t n_shared = sizeof(%(acc_dtype)s) * n_threads; size_t n_shared = sizeof(%(acc_dtype)s) * n_threads;
int err = GpuKernel_call(&%(k_var)s, 1, &n_threads, &n_blocks, n_shared, kernel_params); int err = GpuKernel_call(&%(k_var)s, 1, &n_blocks, &n_threads, n_shared, kernel_params);
%(err_check)s %(err_check)s
%(sync)s %(sync)s
} }
...@@ -1424,7 +1424,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1424,7 +1424,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
(void *)%(z)s->ga.data, (void *)%(z)s->ga.data,
(void *)&%(z)s->ga.offset, (void *)&%(z)s->ga.offset,
(void *)&stride_Z0, (void *)&stride_Z1}; (void *)&stride_Z0, (void *)&stride_Z1};
int err = GpuKernel_call(%(k_var)s, 3, n_threads, n_blocks, 0, kernel_params); int err = GpuKernel_call(%(k_var)s, 3, n_blocks, n_threads, 0, kernel_params);
%(err_check)s %(err_check)s
%(sync)s %(sync)s
}else{ }else{
...@@ -1453,7 +1453,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1453,7 +1453,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
(void *)&stride_A0, (void *)&stride_A1, (void *)&stride_A2, (void *)&stride_A0, (void *)&stride_A1, (void *)&stride_A2,
(void *)%(z)s->ga.data, (void *)&%(z)s->ga.offset, (void *)%(z)s->ga.data, (void *)&%(z)s->ga.offset,
(void *)&stride_Z0, (void *)&stride_Z1}; (void *)&stride_Z0, (void *)&stride_Z1};
int err = GpuKernel_call(%(k_var)s, 3, n_threads, n_blocks, n_shared, kernel_params); int err = GpuKernel_call(%(k_var)s, 3, n_blocks, n_threads, n_shared, kernel_params);
%(err_check)s %(err_check)s
%(sync)s %(sync)s
} }
...@@ -1528,7 +1528,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1528,7 +1528,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
(void *)%(z)s->ga.data, (void *)%(z)s->ga.data,
(void *)&%(z)s->ga.offset, (void *)&%(z)s->ga.offset,
(void *)&stride_Z0, (void *)&stride_Z1}; (void *)&stride_Z0, (void *)&stride_Z1};
int err = GpuKernel_call(&%(k_var)s, 3, n_threads, n_blocks, 0, kernel_params); int err = GpuKernel_call(&%(k_var)s, 3, n_blocks, n_threads, 0, kernel_params);
%(err_check)s %(err_check)s
%(sync)s %(sync)s
} }
...@@ -1662,7 +1662,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1662,7 +1662,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
(void *)%(z)s->ga.data, (void *)%(z)s->ga.data,
(void *)&%(z)s->ga.offset, (void *)&%(z)s->ga.offset,
(void *)&stride_Z0, (void *)&stride_Z1}; (void *)&stride_Z0, (void *)&stride_Z1};
int err = GpuKernel_call(&%(k_var)s, 3, n_threads, n_blocks, 0, kernel_params); int err = GpuKernel_call(&%(k_var)s, 3, n_blocks, n_thread, 0, kernel_params);
%(err_check)s %(err_check)s
%(sync)s %(sync)s
} }
...@@ -2849,7 +2849,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2849,7 +2849,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
if (gs == 0) gs = 1; if (gs == 0) gs = 1;
n /= gs; n /= gs;
ls = %(ls)s; ls = %(ls)s;
err = GpuKernel_call(&%(k_var)s, 1, &ls, &gs, 0, args); err = GpuKernel_call(&%(k_var)s, 1, &gs, &ls, 0, args);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"gpuarray error: GpuCAReduceCPY: %%s.", "gpuarray error: GpuCAReduceCPY: %%s.",
......
...@@ -396,7 +396,7 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -396,7 +396,7 @@ class GpuCumOp(GpuKernelBase, Op):
(void*) &offsetZ, (void*) &offsetZ,
(void*) deviceBlockSum->ga.data (void*) deviceBlockSum->ga.data
}; };
int err = GpuKernel_call(&k_blockCumOp_%(nodename)s, 3, dimBlock, dimGrid, sharedBytes, kernel_params); int err = GpuKernel_call(&k_blockCumOp_%(nodename)s, 3, dimGrid, dimBlock, sharedBytes, kernel_params);
if (err != GA_NO_ERROR){ if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "blockCumOp call failed"); PyErr_SetString(PyExc_RuntimeError, "blockCumOp call failed");
return -1; return -1;
...@@ -421,7 +421,7 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -421,7 +421,7 @@ class GpuCumOp(GpuKernelBase, Op):
(void*) &offsetY, (void*) &offsetY,
(void*) &offsetZ (void*) &offsetZ
}; };
int err = GpuKernel_call(&k_finalCumOp_%(nodename)s, 3, dimBlock, dimGrid, sharedBytes, kernel_params); int err = GpuKernel_call(&k_finalCumOp_%(nodename)s, 3, dimGrid, dimBlock, sharedBytes, kernel_params);
if (err != GA_NO_ERROR){ if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "finalCumOp call failed"); PyErr_SetString(PyExc_RuntimeError, "finalCumOp call failed");
return -1; return -1;
...@@ -446,7 +446,7 @@ class GpuCumOp(GpuKernelBase, Op): ...@@ -446,7 +446,7 @@ class GpuCumOp(GpuKernelBase, Op):
(void*) &(tmp0), (void*) &(tmp0),
(void*) &(tmp1) (void*) &(tmp1)
}; };
int err = GpuKernel_call(&k_cumadd_%(nodename)s, 3, dimBlock, dimGrid, sharedBytes, kernel_params); int err = GpuKernel_call(&k_cumadd_%(nodename)s, 3, dimGrid, dimBlock, sharedBytes, kernel_params);
if (err != GA_NO_ERROR){ if (err != GA_NO_ERROR){
PyErr_SetString(PyExc_RuntimeError, "cumadd call failed"); PyErr_SetString(PyExc_RuntimeError, "cumadd call failed");
return -1; return -1;
......
...@@ -224,7 +224,7 @@ int gemm16(PyGpuArrayObject *C, float alpha, ...@@ -224,7 +224,7 @@ int gemm16(PyGpuArrayObject *C, float alpha,
params[11] = &beta; params[11] = &beta;
params[12] = &flags; params[12] = &flags;
if (GpuKernel_call(gk, 2, threads, grid, 0, params) != GA_NO_ERROR) { if (GpuKernel_call(gk, 2, grid, threads, 0, params) != GA_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "error in gemm16 kernel call"); PyErr_SetString(PyExc_RuntimeError, "error in gemm16 kernel call");
res = 1; res = 1;
} }
......
...@@ -205,7 +205,7 @@ KERNEL void k_multi_warp_multinomial( ...@@ -205,7 +205,7 @@ KERNEL void k_multi_warp_multinomial(
args[8] = (void*)&strides[3]; args[8] = (void*)&strides[3];
args[9] = (void*)&strides[4]; args[9] = (void*)&strides[4];
err = GpuKernel_call(&%(kname)s, 1, &nb_threads, &nb_blocks, 0, args); err = GpuKernel_call(&%(kname)s, 1, &nb_blocks, &nb_threads, 0, args);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format( PyErr_Format(
PyExc_RuntimeError, PyExc_RuntimeError,
...@@ -455,7 +455,7 @@ KERNEL void k_multi_warp_multinomial_wor( ...@@ -455,7 +455,7 @@ KERNEL void k_multi_warp_multinomial_wor(
nb_blocks2[0] = nb_blocks; nb_blocks2[0] = nb_blocks;
nb_blocks2[1] = 1; nb_blocks2[1] = 1;
err = GpuKernel_call(&%(kname)s, 2, nb_threads2, nb_blocks2, 0, args); err = GpuKernel_call(&%(kname)s, 2, nb_blocks2, nb_threads2, 0, args);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format( PyErr_Format(
PyExc_RuntimeError, PyExc_RuntimeError,
......
...@@ -470,7 +470,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -470,7 +470,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
(void *)&stride_Z1, (void *)&stride_Z1,
(void *)%(z)s->ga.data, (void *)%(z)s->ga.data,
(void *)&%(z)s->ga.offset}; (void *)&%(z)s->ga.offset};
err = GpuKernel_call(fptr, 3, threads_per_block, n_blocks, 0, kernel_params); err = GpuKernel_call(fptr, 3, n_blocks, threads_per_block, 0, kernel_params);
%(err_check)s %(err_check)s
%(sync)s %(sync)s
} // END NESTED SCOPE } // END NESTED SCOPE
......
...@@ -411,7 +411,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op): ...@@ -411,7 +411,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
(void *)&stride_YIDX0, (void *)&stride_YIDX0,
(void *)%(dx)s->ga.data, (void *)&%(dx)s->ga.offset, (void *)%(dx)s->ga.data, (void *)&%(dx)s->ga.offset,
(void *)&stride_DX0, (void *)&stride_DX1}; (void *)&stride_DX0, (void *)&stride_DX1};
int err = GpuKernel_call(&%(k_var)s, 3, threads_per_block, n_blocks, 0, kernel_params); int err = GpuKernel_call(&%(k_var)s, 3, n_blocks, threads_per_block, 0, kernel_params);
%(err_check)s %(err_check)s
%(sync)s %(sync)s
} }
...@@ -587,13 +587,13 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -587,13 +587,13 @@ class GpuSoftmax(GpuKernelBase, Op):
//TODO: read the information from the card. //TODO: read the information from the card.
if(shmem_sz < (32 * 1024 - 500)){ if(shmem_sz < (32 * 1024 - 500)){
err = GpuKernel_call(&kSoftmax_%(nodename)s, 3, err = GpuKernel_call(&kSoftmax_%(nodename)s, 3,
threads_per_block, n_blocks, shmem_sz, n_blocks, threads_per_block, shmem_sz,
kernel_params); kernel_params);
fmt_str = "gpuarray error: kSoftmax_%(nodename)s: %%s"; fmt_str = "gpuarray error: kSoftmax_%(nodename)s: %%s";
msg = GpuKernel_error(&kSoftmax_%(nodename)s, err); msg = GpuKernel_error(&kSoftmax_%(nodename)s, err);
}else{ }else{
err = GpuKernel_call(&kSoftmax_fixed_shared%(nodename)s, 3, err = GpuKernel_call(&kSoftmax_fixed_shared%(nodename)s, 3,
threads_per_block, n_blocks, n_blocks, threads_per_block,
threads_per_block[0] * sizeof(npy_%(work_x)s), threads_per_block[0] * sizeof(npy_%(work_x)s),
kernel_params); kernel_params);
fmt_str = "gpuarray error: kSoftmax_fixed_shared%(nodename)s: %%s"; fmt_str = "gpuarray error: kSoftmax_fixed_shared%(nodename)s: %%s";
...@@ -801,13 +801,13 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -801,13 +801,13 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
{ {
if(shmem_sz < (32 * 1024 - 500)){ if(shmem_sz < (32 * 1024 - 500)){
err = GpuKernel_call(&kSoftmaxWithBias_%(nodename)s, 3, err = GpuKernel_call(&kSoftmaxWithBias_%(nodename)s, 3,
threads_per_block, n_blocks, shmem_sz, n_blocks, threads_per_block, shmem_sz,
kernel_params); kernel_params);
fmt_str = "gpuarray error: kSoftmaxWithBias_%(nodename)s: %%s"; fmt_str = "gpuarray error: kSoftmaxWithBias_%(nodename)s: %%s";
msg = GpuKernel_error(&kSoftmaxWithBias_%(nodename)s, err); msg = GpuKernel_error(&kSoftmaxWithBias_%(nodename)s, err);
}else{ }else{
err = GpuKernel_call(&kSoftmaxWithBias_fixed_shared%(nodename)s, err = GpuKernel_call(&kSoftmaxWithBias_fixed_shared%(nodename)s,
3, threads_per_block, n_blocks, 3, n_blocks, threads_per_block,
threads_per_block[0] * sizeof(npy_%(work_x)s), threads_per_block[0] * sizeof(npy_%(work_x)s),
kernel_params); kernel_params);
fmt_str = "gpuarray error: kSoftmaxWithBias_fixed_shared%(nodename)s: %%s"; fmt_str = "gpuarray error: kSoftmaxWithBias_fixed_shared%(nodename)s: %%s";
......
...@@ -1055,7 +1055,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) { ...@@ -1055,7 +1055,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
(void *)&indices_arr->ga.offset, (void *)&indices_arr->ga.offset,
(void *)&set_instead_of_inc, (void *)&set_instead_of_inc,
(void *)errbuf}; (void *)errbuf};
err = GpuKernel_call(&%(k_var)s, 3, threads_per_block, n_blocks, 0, kernel_params); err = GpuKernel_call(&%(k_var)s, 3, n_blocks, threads_per_block, 0, kernel_params);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"gpuarray error: %(k_var)s: %%s.", "gpuarray error: %(k_var)s: %%s.",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论