提交 e4e88af1 authored 作者: Frederic Bastien's avatar Frederic Bastien

make CudaNdarray_inplace_add and CudaNdarray_add work with tensor of 0 elements and test them.

上级 6664a048
......@@ -786,6 +786,10 @@ CudaNdarray_add(PyObject* py_self, PyObject * py_other)
return NULL;
}
if(CudaNdarray_SIZE((CudaNdarray *)py_self)==0 && CudaNdarray_SIZE((CudaNdarray *)py_other)==0){
return (PyObject *) rval;
}
int threads_per_block = std::min(size, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
int n_blocks = std::min(ceil_intdiv(size,(unsigned int)threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS);
kAdd_contiguous<<<n_blocks,threads_per_block>>>(
......@@ -873,7 +877,12 @@ CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other)
}
size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i];
}
if(CudaNdarray_SIZE((CudaNdarray *)py_self)==0 && CudaNdarray_SIZE((CudaNdarray *)py_other)==0){
Py_INCREF(py_self);
return py_self;
}
switch(self->nd)
{
case 1:
......
......@@ -16,7 +16,7 @@ def test_host_to_device():
assert numpy.all(a == c)
def test_add():
for shape in ((), (3,), (2,3), (1,10000000),(10,1000000), (100,100000),(1000,10000),(10000,1000)):
for shape in ((), (0,), (3,), (2,3), (1,10000000),(10,1000000), (100,100000),(1000,10000),(10000,1000)):
a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a1 = a0.copy()
b0 = cuda_ndarray.CudaNdarray(a0)
......@@ -34,6 +34,13 @@ def test_add():
print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', cpu_dt / gpu_dt
assert numpy.allclose(asum, numpy.asarray(bsum))
if len(shape)>0:
#test inplace version, not implemented with 0 dims
b0 += b1
a0 += a1
assert numpy.allclose(a0, numpy.asarray(b0))
assert numpy.allclose(a0,a1*2)
if len(shape)==2:
#test not contiguous version.
#should raise not implemented.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论