make CudaNdarray setitem fct able to unbroadcast a value.

8aeb40a8 · Frederic Bastien · e6432acb · 8aeb40a8 · 8aeb40a8 · 8aeb40a8
--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -1428,7 +1428,7 @@ CudaNdarray_setitem(PyObject *o, PyObject  *key, PyObject  *v)
        return -1;
    }
-    if(CudaNdarray_CopyFromCudaNdarray(rval, (CudaNdarray*)v))
+    if(CudaNdarray_CopyFromCudaNdarray(rval, (CudaNdarray*)v, true))
    {
        Py_DECREF(viewCopyForComparison);
        Py_DECREF((PyObject*)rval);
@@ -2045,7 +2045,7 @@ static __global__ void k_copy_1d(const int N, const float * x, const int sx, flo
 }
 //copy from other into self
-int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other)
+int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, bool unbroadcast)
 {
    int verbose = 0;
    //standard elemwise size checks
@@ -2063,7 +2063,8 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other)
    unsigned int size = 1;
    for (int i = 0; i< self->nd; ++i)
    {
-        if (CudaNdarray_HOST_DIMS(self)[i] != CudaNdarray_HOST_DIMS(other)[i])
+        if ((CudaNdarray_HOST_DIMS(self)[i] != CudaNdarray_HOST_DIMS(other)[i]) 
+	    && (1!=CudaNdarray_HOST_DIMS(other)[i] || !unbroadcast) )
        {
 	  PyErr_Format(PyExc_TypeError, "need same dimensions for dim %d, destination=%d, source=%d",
 		       i, CudaNdarray_HOST_DIMS(self)[i], CudaNdarray_HOST_DIMS(other)[i]);
@@ -2123,11 +2124,14 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other)
                // call worker routine
                unsigned int n_blocks = std::min(size, (unsigned int)NUM_VECTOR_OP_BLOCKS);
                unsigned int threads_per_block = std::min(ceil_intdiv(size, n_blocks), (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
+		CudaNdarray * cuda_dims = other;
+		if(unbroadcast)
+		  cuda_dims = self;
                //copy from other into self
                k_elemwise_unary_rowmajor_copy<<<n_blocks, threads_per_block>>>(
                        size, 
                        (unsigned int)other->nd,
-                        (const int *)CudaNdarray_DEV_DIMS(other),
+                        (const int *)CudaNdarray_DEV_DIMS(cuda_dims),
                        (const float*)CudaNdarray_DEV_DATA(other), (const int *)CudaNdarray_DEV_STRIDES(other),
                        CudaNdarray_DEV_DATA(self),  (const int *)CudaNdarray_DEV_STRIDES(self));
                CNDA_THREAD_SYNC;

--- a/theano/sandbox/cuda/cuda_ndarray.cuh
+++ b/theano/sandbox/cuda/cuda_ndarray.cuh
@@ -459,7 +459,7 @@ int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj);
 *
 * self is reallocated to have the correct dimensions if necessary.
 */
-int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other);
+int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, bool unbroadcast = false);
 /**
 * Transfer the contents of CudaNdarray `self` to a new numpy ndarray.

--- a/theano/sandbox/cuda/tests/test_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_cuda_ndarray.py
@@ -407,6 +407,23 @@ def test_setitem_matrix_tensor3():
    assert numpy.all(numpy.asarray(_a[:,1,1]) == b)
+def test_setitem_matrix_bad_shape():
+    a = numpy.arange(27)
+    a.resize((3,3,3))
+    a = theano._asarray(a, dtype='float32')
+    _a = cuda_ndarray.CudaNdarray(a)
+    b = theano._asarray([7,8], dtype='float32')
+    _b = cuda_ndarray.CudaNdarray(b)
+    try:
+        # attempt to assign the ndarray b with setitem                                                                                                                                              
+        _a[:,:,1] = _b
+        assert False
+    except TypeError, e:
+        #print e
+        assert True
 def test_setitem_assign_to_slice():
    a = numpy.arange(27)
    a.resize((3,3,3))
@@ -425,9 +442,7 @@ def test_setitem_assign_to_slice():
    assert numpy.all(numpy.asarray(_a[:,1,1]) == b)
+def test_setitem_broadcast():
-# this fails for the moment
-def test_setitem_broadcast_must_fail():
    a = numpy.arange(27)
    a.resize((3,3,3))
    a = theano._asarray(a, dtype='float32')
@@ -435,16 +450,15 @@ def test_setitem_broadcast_must_fail():
    b = theano._asarray([7,8,9], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)
+    _a[:,:,1] = _b.reshape((1,3))
-    try:
+    a[:,:,1] = b.reshape((1,3))
-        # attempt to assign vector to all rows of this submatrix
+    assert numpy.allclose(numpy.asarray(_a),a)
-        _a[:,:,1] = _b
-        assert False
-    except TypeError:
-        assert True
 # this also fails for the moment
 def test_setitem_rightvalue_ndarray_fails():
+    """
+    Now we don't automatically add dimensions to broadcast
+    """
    a = numpy.arange(27)
    a.resize((3,3,3))
    a = theano._asarray(a, dtype='float32')