提交 be4d06b0 authored 作者: Frederic Bastien's avatar Frederic Bastien

'implemented CudaNdarray.i{add,div} for ndim==0 and test it.'

上级 76357631
...@@ -1048,6 +1048,31 @@ CudaNdarray_inplace_add_div(PyObject* py_self, PyObject * py_other, int fct_nb) ...@@ -1048,6 +1048,31 @@ CudaNdarray_inplace_add_div(PyObject* py_self, PyObject * py_other, int fct_nb)
switch(self->nd) switch(self->nd)
{ {
case 0:
{
dim3 n_blocks(1, 1, 1);
dim3 n_threads(1);
k_iop_3<<<n_blocks, n_threads>>>(1,
1, //CudaNdarray_HOST_DIMS(self)[0],
1, //CudaNdarray_HOST_DIMS(self)[0],
CudaNdarray_DEV_DATA(self),
1,
1, //CudaNdarray_HOST_STRIDES(self)[0],
CudaNdarray_HOST_STRIDES(self)[0],
CudaNdarray_DEV_DATA(other),
1,
1, //CudaNdarray_HOST_STRIDES(other)[0],
CudaNdarray_HOST_STRIDES(other)[0]);
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
PyErr_Format(PyExc_RuntimeError, "Cuda error: %s: %s.\n", "k_iop_3", cudaGetErrorString(err));
return NULL;
}
Py_INCREF(py_self);
return py_self;
}
case 1: case 1:
{ {
dim3 n_blocks(1, 1, 1); dim3 n_blocks(1, 1, 1);
......
...@@ -59,8 +59,7 @@ def test_add_iadd_idiv(): ...@@ -59,8 +59,7 @@ def test_add_iadd_idiv():
print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt) print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
assert numpy.allclose(asum, numpy.asarray(bsum)) assert numpy.allclose(asum, numpy.asarray(bsum))
if len(shape)>0: # test inplace version
#test inplace version, not implemented with 0 dims
b0 += b1 b0 += b1
a0 += a1 a0 += a1
assert numpy.allclose(a0, numpy.asarray(b0)) assert numpy.allclose(a0, numpy.asarray(b0))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论