提交 8aeb40a8 authored 作者: Frederic Bastien's avatar Frederic Bastien

make CudaNdarray setitem fct able to unbroadcast a value.

上级 e6432acb
...@@ -1428,7 +1428,7 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *v) ...@@ -1428,7 +1428,7 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *v)
return -1; return -1;
} }
if(CudaNdarray_CopyFromCudaNdarray(rval, (CudaNdarray*)v)) if(CudaNdarray_CopyFromCudaNdarray(rval, (CudaNdarray*)v, true))
{ {
Py_DECREF(viewCopyForComparison); Py_DECREF(viewCopyForComparison);
Py_DECREF((PyObject*)rval); Py_DECREF((PyObject*)rval);
...@@ -2045,7 +2045,7 @@ static __global__ void k_copy_1d(const int N, const float * x, const int sx, flo ...@@ -2045,7 +2045,7 @@ static __global__ void k_copy_1d(const int N, const float * x, const int sx, flo
} }
//copy from other into self //copy from other into self
int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other) int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, bool unbroadcast)
{ {
int verbose = 0; int verbose = 0;
//standard elemwise size checks //standard elemwise size checks
...@@ -2063,7 +2063,8 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other) ...@@ -2063,7 +2063,8 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other)
unsigned int size = 1; unsigned int size = 1;
for (int i = 0; i< self->nd; ++i) for (int i = 0; i< self->nd; ++i)
{ {
if (CudaNdarray_HOST_DIMS(self)[i] != CudaNdarray_HOST_DIMS(other)[i]) if ((CudaNdarray_HOST_DIMS(self)[i] != CudaNdarray_HOST_DIMS(other)[i])
&& (1!=CudaNdarray_HOST_DIMS(other)[i] || !unbroadcast) )
{ {
PyErr_Format(PyExc_TypeError, "need same dimensions for dim %d, destination=%d, source=%d", PyErr_Format(PyExc_TypeError, "need same dimensions for dim %d, destination=%d, source=%d",
i, CudaNdarray_HOST_DIMS(self)[i], CudaNdarray_HOST_DIMS(other)[i]); i, CudaNdarray_HOST_DIMS(self)[i], CudaNdarray_HOST_DIMS(other)[i]);
...@@ -2123,11 +2124,14 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other) ...@@ -2123,11 +2124,14 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other)
// call worker routine // call worker routine
unsigned int n_blocks = std::min(size, (unsigned int)NUM_VECTOR_OP_BLOCKS); unsigned int n_blocks = std::min(size, (unsigned int)NUM_VECTOR_OP_BLOCKS);
unsigned int threads_per_block = std::min(ceil_intdiv(size, n_blocks), (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); unsigned int threads_per_block = std::min(ceil_intdiv(size, n_blocks), (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
CudaNdarray * cuda_dims = other;
if(unbroadcast)
cuda_dims = self;
//copy from other into self //copy from other into self
k_elemwise_unary_rowmajor_copy<<<n_blocks, threads_per_block>>>( k_elemwise_unary_rowmajor_copy<<<n_blocks, threads_per_block>>>(
size, size,
(unsigned int)other->nd, (unsigned int)other->nd,
(const int *)CudaNdarray_DEV_DIMS(other), (const int *)CudaNdarray_DEV_DIMS(cuda_dims),
(const float*)CudaNdarray_DEV_DATA(other), (const int *)CudaNdarray_DEV_STRIDES(other), (const float*)CudaNdarray_DEV_DATA(other), (const int *)CudaNdarray_DEV_STRIDES(other),
CudaNdarray_DEV_DATA(self), (const int *)CudaNdarray_DEV_STRIDES(self)); CudaNdarray_DEV_DATA(self), (const int *)CudaNdarray_DEV_STRIDES(self));
CNDA_THREAD_SYNC; CNDA_THREAD_SYNC;
......
...@@ -459,7 +459,7 @@ int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj); ...@@ -459,7 +459,7 @@ int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj);
* *
* self is reallocated to have the correct dimensions if necessary. * self is reallocated to have the correct dimensions if necessary.
*/ */
int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other); int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, bool unbroadcast = false);
/** /**
* Transfer the contents of CudaNdarray `self` to a new numpy ndarray. * Transfer the contents of CudaNdarray `self` to a new numpy ndarray.
......
...@@ -407,6 +407,23 @@ def test_setitem_matrix_tensor3(): ...@@ -407,6 +407,23 @@ def test_setitem_matrix_tensor3():
assert numpy.all(numpy.asarray(_a[:,1,1]) == b) assert numpy.all(numpy.asarray(_a[:,1,1]) == b)
def test_setitem_matrix_bad_shape():
a = numpy.arange(27)
a.resize((3,3,3))
a = theano._asarray(a, dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
b = theano._asarray([7,8], dtype='float32')
_b = cuda_ndarray.CudaNdarray(b)
try:
# attempt to assign the ndarray b with setitem
_a[:,:,1] = _b
assert False
except TypeError, e:
#print e
assert True
def test_setitem_assign_to_slice(): def test_setitem_assign_to_slice():
a = numpy.arange(27) a = numpy.arange(27)
a.resize((3,3,3)) a.resize((3,3,3))
...@@ -425,9 +442,7 @@ def test_setitem_assign_to_slice(): ...@@ -425,9 +442,7 @@ def test_setitem_assign_to_slice():
assert numpy.all(numpy.asarray(_a[:,1,1]) == b) assert numpy.all(numpy.asarray(_a[:,1,1]) == b)
def test_setitem_broadcast():
# this fails for the moment
def test_setitem_broadcast_must_fail():
a = numpy.arange(27) a = numpy.arange(27)
a.resize((3,3,3)) a.resize((3,3,3))
a = theano._asarray(a, dtype='float32') a = theano._asarray(a, dtype='float32')
...@@ -435,16 +450,15 @@ def test_setitem_broadcast_must_fail(): ...@@ -435,16 +450,15 @@ def test_setitem_broadcast_must_fail():
b = theano._asarray([7,8,9], dtype='float32') b = theano._asarray([7,8,9], dtype='float32')
_b = cuda_ndarray.CudaNdarray(b) _b = cuda_ndarray.CudaNdarray(b)
_a[:,:,1] = _b.reshape((1,3))
try: a[:,:,1] = b.reshape((1,3))
# attempt to assign vector to all rows of this submatrix assert numpy.allclose(numpy.asarray(_a),a)
_a[:,:,1] = _b
assert False
except TypeError:
assert True
# this also fails for the moment # this also fails for the moment
def test_setitem_rightvalue_ndarray_fails(): def test_setitem_rightvalue_ndarray_fails():
"""
Now we don't automatically add dimensions to broadcast
"""
a = numpy.arange(27) a = numpy.arange(27)
a.resize((3,3,3)) a.resize((3,3,3))
a = theano._asarray(a, dtype='float32') a = theano._asarray(a, dtype='float32')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论