提交 be4d06b0 authored 作者: Frederic Bastien's avatar Frederic Bastien

'implemented CudaNdarray.i{add,div} for ndim==0 and test it.'

上级 76357631
......@@ -1048,6 +1048,31 @@ CudaNdarray_inplace_add_div(PyObject* py_self, PyObject * py_other, int fct_nb)
switch(self->nd)
{
case 0:
{
dim3 n_blocks(1, 1, 1);
dim3 n_threads(1);
k_iop_3<<<n_blocks, n_threads>>>(1,
1, //CudaNdarray_HOST_DIMS(self)[0],
1, //CudaNdarray_HOST_DIMS(self)[0],
CudaNdarray_DEV_DATA(self),
1,
1, //CudaNdarray_HOST_STRIDES(self)[0],
CudaNdarray_HOST_STRIDES(self)[0],
CudaNdarray_DEV_DATA(other),
1,
1, //CudaNdarray_HOST_STRIDES(other)[0],
CudaNdarray_HOST_STRIDES(other)[0]);
CNDA_THREAD_SYNC;
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
PyErr_Format(PyExc_RuntimeError, "Cuda error: %s: %s.\n", "k_iop_3", cudaGetErrorString(err));
return NULL;
}
Py_INCREF(py_self);
return py_self;
}
case 1:
{
dim3 n_blocks(1, 1, 1);
......
......@@ -59,17 +59,16 @@ def test_add_iadd_idiv():
print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
assert numpy.allclose(asum, numpy.asarray(bsum))
if len(shape)>0:
#test inplace version, not implemented with 0 dims
b0 += b1
a0 += a1
assert numpy.allclose(a0, numpy.asarray(b0))
assert numpy.allclose(a0,a1*2)
b0 /= b1
a0 /= a1
assert numpy.allclose(a0, numpy.asarray(b0))
assert numpy.allclose(a0,numpy.ones(a0.shape)*2)
# test inplace version
b0 += b1
a0 += a1
assert numpy.allclose(a0, numpy.asarray(b0))
assert numpy.allclose(a0,a1*2)
b0 /= b1
a0 /= a1
assert numpy.allclose(a0, numpy.asarray(b0))
assert numpy.allclose(a0,numpy.ones(a0.shape)*2)
if len(shape)==2:
#test not contiguous version.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论