提交 e4e88af1 authored 作者: Frederic Bastien's avatar Frederic Bastien

make CudaNdarray_inplace_add and CudaNdarray_add work with tensor of 0 elements and test them.

上级 6664a048
...@@ -786,6 +786,10 @@ CudaNdarray_add(PyObject* py_self, PyObject * py_other) ...@@ -786,6 +786,10 @@ CudaNdarray_add(PyObject* py_self, PyObject * py_other)
return NULL; return NULL;
} }
if(CudaNdarray_SIZE((CudaNdarray *)py_self)==0 && CudaNdarray_SIZE((CudaNdarray *)py_other)==0){
return (PyObject *) rval;
}
int threads_per_block = std::min(size, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); int threads_per_block = std::min(size, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
int n_blocks = std::min(ceil_intdiv(size,(unsigned int)threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS); int n_blocks = std::min(ceil_intdiv(size,(unsigned int)threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS);
kAdd_contiguous<<<n_blocks,threads_per_block>>>( kAdd_contiguous<<<n_blocks,threads_per_block>>>(
...@@ -874,6 +878,11 @@ CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other) ...@@ -874,6 +878,11 @@ CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other)
size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i]; size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i];
} }
if(CudaNdarray_SIZE((CudaNdarray *)py_self)==0 && CudaNdarray_SIZE((CudaNdarray *)py_other)==0){
Py_INCREF(py_self);
return py_self;
}
switch(self->nd) switch(self->nd)
{ {
case 1: case 1:
......
...@@ -16,7 +16,7 @@ def test_host_to_device(): ...@@ -16,7 +16,7 @@ def test_host_to_device():
assert numpy.all(a == c) assert numpy.all(a == c)
def test_add(): def test_add():
for shape in ((), (3,), (2,3), (1,10000000),(10,1000000), (100,100000),(1000,10000),(10000,1000)): for shape in ((), (0,), (3,), (2,3), (1,10000000),(10,1000000), (100,100000),(1000,10000),(10000,1000)):
a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32') a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32')
a1 = a0.copy() a1 = a0.copy()
b0 = cuda_ndarray.CudaNdarray(a0) b0 = cuda_ndarray.CudaNdarray(a0)
...@@ -34,6 +34,13 @@ def test_add(): ...@@ -34,6 +34,13 @@ def test_add():
print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', cpu_dt / gpu_dt print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', cpu_dt / gpu_dt
assert numpy.allclose(asum, numpy.asarray(bsum)) assert numpy.allclose(asum, numpy.asarray(bsum))
if len(shape)>0:
#test inplace version, not implemented with 0 dims
b0 += b1
a0 += a1
assert numpy.allclose(a0, numpy.asarray(b0))
assert numpy.allclose(a0,a1*2)
if len(shape)==2: if len(shape)==2:
#test not contiguous version. #test not contiguous version.
#should raise not implemented. #should raise not implemented.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论