提交 3a4e6c78 authored 作者: lamblin's avatar lamblin

Merge pull request #582 from nouiz/gpu_stuff

Gpu stuff
......@@ -53,6 +53,10 @@ New Features
(Frederic B., Simon McGregor)
* MRG random now raises an error with a clear message when the passed shape
contains dimensions with bad value like 0. (Frédéric B. reported by Ian G.)
* "CudaNdarray[*] = ndarray" work in more case (Frederic B.)
* "CudaNdarray[*] += ndarray" work in more case (Frederic B.)
* We add dimensions to CudaNdarray to automatically broadcast more frequently.
(Frederic B.)
Sparse
* Implement theano.sparse.mul(sparse1, sparse2) when both inputs don't
......
......@@ -1026,13 +1026,11 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
for r in node.outputs:
if isinstance(r.type, (TensorType, CudaNdarrayType)):
# Build a C-contiguous buffer
new_buf = numpy.zeros(
shape=r_vals[r].shape,
dtype=r_vals[r].dtype,
order='C')
new_buf += def_val
if isinstance(r.type, CudaNdarrayType):
new_buf = CudaNdarray(new_buf)
new_buf = r.type.value_zeros(r_vals[r].shape)
# CudaNdarray don't have flags field
# assert new_buf.flags["C_CONTIGUOUS"]
new_buf += numpy.asarray(def_val).astype(r.type.dtype)
c_cont_outputs[r] = new_buf
if len(c_cont_outputs):
......@@ -1096,21 +1094,12 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
shapes.append(slice(None, size, None))
r_buf = init_strided[r]
if r_buf.ndim > 0:
r_buf = r_buf[tuple(strides)][tuple(shapes)]
assert r_buf.shape == r_vals[r].shape
if isinstance(r.type, CudaNdarrayType):
# It seems stupid, but we need to allocate a
# new ndarray and copy it into the GPU one.
# TODO: When it is possible to simply do
# r_buff[...] = def_val, do so.
new_rbuf = numpy.zeros(r_vals[r].shape,
dtype=r.dtype)
new_rbuf += def_val
r_buf[...] = CudaNdarray(new_rbuf)
else:
r_buf[...] = def_val
r_buf[...] = numpy.asarray(def_val).astype(r_buf.dtype)
strided[r] = r_buf
......@@ -1133,12 +1122,8 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
out_shape = [max((s + sd), 0)
for s, sd in zip(r_vals[r].shape,
r_shape_diff)]
new_buf = numpy.zeros(
shape=out_shape,
dtype=r.dtype)
new_buf += def_val
if isinstance(r.type, CudaNdarrayType):
new_buf = CudaNdarray(new_buf)
new_buf = r.type.value_zeros(r_vals[r].shape)
new_buf += numpy.asarray(def_val).astype(r.type.dtype)
wrong_size[r] = new_buf
yield (name, wrong_size)
......
......@@ -1923,10 +1923,6 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
ilist_ = tensor.as_tensor_variable(ilist)
assert x_.type.dtype == y_.type.dtype
assert x_.type.ndim == y_.type.ndim
# if (x_.type.ndim - 1) > y_.type.ndim:
# y_ = tensor.shape_padleft(y_, x_.type.ndim - y_.type.ndim)
# assert x_.type.ndim == y_.type.ndim
assert x_.type.ndim >= y_.type.ndim
if ilist_.type.dtype[:3] not in ('int', 'uin'):
......@@ -1941,9 +1937,40 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
return Apply(self, [x_, y_, ilist_], [x_.type()])
#def perform(self, node, inp, out_):
# CudaNdarray_Subscript() don't support Advanced slicing.
# so we use the parent version that loop on each indices.
# CudaNdarray_Subscript() don't support Advanced slicing.
# But we can't use the parent version that loop on each indices
# as we also need to loop when set_instead_of_inc is True and the
# parent don't loop in that case.
def perform(self, node, inp, out_):
# TODO opt to make this inplace
x, y, idx = inp
out, = out_
if not self.inplace:
x = x.copy()
if self.set_instead_of_inc:
# CudaNdarray __setitem__ don't do broadcast nor support
# list of index.
assert y.ndim <= x.ndim # Should be guaranteed by `make_node`
if y.ndim == x.ndim:
assert len(y) == len(idx)
for (j, i) in enumerate(idx):
x[i] = y[j]
else:
for i in idx:
x[i] = y
else:
# If `y` has as many dimensions as `x`, then we want to iterate
# jointly on `x` and `y`. Otherwise, it means `y` should be
# broadcasted to fill all relevant rows of `x`.
assert y.ndim <= x.ndim # Should be guaranteed by `make_node`
if y.ndim == x.ndim:
assert len(y) == len(idx)
for (j, i) in enumerate(idx):
x[i] += y[j]
else:
for i in idx:
x[i] += y
out[0] = x
class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
......
......@@ -663,7 +663,7 @@ PyObject * CudaNdarray_Reshape(CudaNdarray * self, PyObject * shape)
return (PyObject*)rval;
}
PyObject * CudaNdarray_View(CudaNdarray * self)
PyObject * CudaNdarray_View(const CudaNdarray * self)
{
CudaNdarray * rval = (CudaNdarray*)CudaNdarray_New(self->nd);
if (!rval || CudaNdarray_set_device_data(rval, CudaNdarray_DEV_DATA(self), self))
......@@ -985,11 +985,19 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
"CudaNdarray_inplace_elemwise need a CudaNdarray on left");
return -1;
}
CudaNdarray * new_other = NULL;
if (!CudaNdarray_Check(py_other)) {
PyErr_SetString(
PyExc_TypeError,
"CudaNdarray_inplace_elemwise need a CudaNdarray on right");
return -1;
new_other = (CudaNdarray*) CudaNdarray_New();
if(!new_other)
{
return -1;
}
if(CudaNdarray_CopyFromArray(new_other, (PyArrayObject *) py_other))
{
Py_XDECREF(new_other);
return -1;
}
py_other = (PyObject *) new_other;
}
CudaNdarray * self = (CudaNdarray *)py_self;
......@@ -1010,6 +1018,7 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
"CudaNdarray_inplace_elemwise: The destination need more or the"
" same number of dimensions then the source. Got %d and %d.",
self->nd, other->nd);
Py_XDECREF(new_other);
return -1;
}
......@@ -1040,6 +1049,7 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
PyErr_SetString(
PyExc_ValueError,
"CudaNdarray_inplace_elemwise need same dimensions (or broadcastable dimension)");
Py_XDECREF(new_other);
return -1;
}
// if we're broadcasting other, then make sure it has stride 0
......@@ -1050,13 +1060,18 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
if (size==0)
{
if (CudaNdarray_SIZE((CudaNdarray *)py_other))
int other_size = CudaNdarray_SIZE((CudaNdarray *)py_other);
if (!(other_size == 0 || other_size == 1))
{
PyErr_SetString(
PyExc_ValueError,
"CudaNdarray_inplace_elemwise cannot work inplace on an un-initialized array");
"CudaNdarray_inplace_elemwise cannot work inplace on"
" un-initialized array when the new value have more then"
" 0 or 1 broadcastable dimensions");
Py_XDECREF(new_other);
return 0;
}
Py_XDECREF(new_other);
return 0;
}
......@@ -1087,6 +1102,7 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
"Cuda error: %s: %s.\n",
"k3",
cudaGetErrorString(err));
Py_XDECREF(new_other);
return -1;
}
}
......@@ -1119,6 +1135,7 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
"Cuda error: %s: %s.\n",
"k3",
cudaGetErrorString(err));
Py_XDECREF(new_other);
return -1;
}
}
......@@ -1156,6 +1173,7 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
"Cuda error: %s: %s.\n",
"k3",
cudaGetErrorString(err));
Py_XDECREF(new_other);
return -1;
}
}
......@@ -1196,6 +1214,7 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
"Cuda error: %s: %s.\n",
"k3",
cudaGetErrorString(err));
Py_XDECREF(new_other);
return -1;
}
}
......@@ -1240,6 +1259,7 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
"Cuda error: %s: %s.\n",
"k4",
cudaGetErrorString(err));
Py_XDECREF(new_other);
return -1;
}
}
......@@ -1285,6 +1305,7 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
"Cuda error: %s: %s.\n",
"k4",
cudaGetErrorString(err));
Py_XDECREF(new_other);
return -1;
}
}
......@@ -1296,11 +1317,13 @@ CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t
PyExc_NotImplementedError,
"inplace_elemwise w nd=%i\n",
self->nd);
Py_XDECREF(new_other);
return -1;
}
}
if (verbose)
fprintf(stderr, "INPLACE ADD/DIV end\n");
Py_XDECREF(new_other);
return 0;
}
......@@ -1654,7 +1677,7 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
// See http://docs.python.org/dev/py3k/c-api/object.html#PyObject_SetItem
// Doesn't handle broadcasting, e.g. a[:] = 5
// Can only be assigned from a CudaNdarray on the right side
// Or a ndarray when the left side part is c contiguous.
// Or a ndarray
// Or a python scalar with value 0 when the left side part is c contiguous.
static int
CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
......@@ -1663,6 +1686,7 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
if (verbose) fprintf(stderr, "CudaNdarray_setitem start\n");
// We try to copy directly into this CudaNdarray from the ndarray
CudaNdarray* rval = (CudaNdarray*)CudaNdarray_Subscript(o, key);
CudaNdarray* new_value = NULL;
if(!rval){
// CudaNdarray_Subscript failed and set the error msg.
......@@ -1683,7 +1707,10 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
// This case shouldn't happen, based on what I see in Subscript
// but just in case it happens sometime in the future
PyErr_Format(PyExc_RuntimeError, "__getitem__ must return a CudaNdarray that refers to the original CudaNdarray, not a copy. rval.base=%p o.base=%p o=%p",
PyErr_Format(PyExc_RuntimeError,
"__getitem__ must return a CudaNdarray that refers to"
" the original CudaNdarray, not a copy. rval.base=%p"
" o.base=%p o=%p",
(((CudaNdarray*)rval)->base), ((CudaNdarray*)o)->base, o);
Py_DECREF(rval);
return -1;
......@@ -1691,55 +1718,32 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
PyObject * intobj = NULL;
if(CudaNdarray_Check(o) && PyArray_Check(value)){
if (verbose) fprintf(stderr, "CudaNdarray_setitem dest is a CudaNdarray and value is a ndarray\n");
int typenum = PyArray_TYPE(value);
if (typenum != REAL_TYPENUM){
PyErr_SetString(PyExc_TypeError, "CudaNdarray.__setitem__: can only copy from float32 arrays");
Py_XDECREF(rval);
return -1;
}
if(! CudaNdarray_is_c_contiguous(rval)){
PyErr_SetString(PyExc_NotImplementedError, "CudaNdarray.__setitem__: When the new value is an ndarray the part where we copy it to must be c contiguous.");
Py_XDECREF(rval);
return -1;
}
if(rval->nd != ((PyArrayObject*)value)->nd){
PyErr_Format(PyExc_NotImplementedError, "CudaNdarray.__setitem__: need same number of dims. destination nd=%d, source nd=%d. broadcasting implemented only for zeroing values from python scalar.",
rval->nd,((PyArrayObject*)value)->nd);
Py_XDECREF(rval);
if (verbose)
fprintf(stderr,
"CudaNdarray_setitem dest is a CudaNdarray and"
" value is a ndarray\n");
new_value = (CudaNdarray*) CudaNdarray_New();
if(!new_value)
{
return -1;
}
for(int i=0 ; i<rval->nd ; i++){
if(CudaNdarray_HOST_DIMS(rval)[i] != ((PyArrayObject*)value)->dimensions[i]){
PyErr_Format(PyExc_ValueError, "CudaNdarray.__setitem__: need same dimensions for dim %d, destination=%d, source=%ld",
i,
CudaNdarray_HOST_DIMS(rval)[i],
(long int)(((PyArrayObject*)value)->dimensions[i]));
if(CudaNdarray_CopyFromArray(new_value, (PyArrayObject *) value))
{
Py_XDECREF(new_value);
Py_XDECREF(rval);
return -1;
}
}
PyArrayObject * py_v = (PyArrayObject*)PyArray_ContiguousFromAny((PyObject*)value, typenum,
rval->nd, rval->nd);
cublasSetVector(PyArray_SIZE(py_v),
sizeof(real),
PyArray_DATA(py_v), 1,
rval->devdata, 1);
CNDA_THREAD_SYNC;
Py_XDECREF(py_v);
Py_XDECREF(rval);
if (CUBLAS_STATUS_SUCCESS != cublasGetError()){
PyErr_SetString(PyExc_RuntimeError, "CudaNdarray.__setitem__: error copying ndarray data to device memory");
return -1;
}
return 0;
value = (PyObject *) new_value;
}
else if ((intobj=PyNumber_Int(value)))
{
if (verbose) fprintf(stderr, "CudaNdarray_setitem dest and value is a python number\n");
if (verbose)
fprintf(stderr,
"CudaNdarray_setitem dest and value is a python number\n");
if(! CudaNdarray_is_c_contiguous(rval)){
PyErr_SetString(PyExc_NotImplementedError,
"CudaNdarray.__setitem__: When the new value is a scalar of value 0 the part where we copy to must be c contiguous.");
"CudaNdarray.__setitem__: When the new value is a scalar"
" of value 0 the part where we copy to must be c contiguous.");
Py_XDECREF(rval);
return -1;
}
......@@ -1748,7 +1752,8 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
Py_DECREF(intobj); intobj=NULL;
if (val == 0)
{
cudaError_t err = cudaMemset(rval->devdata, 0, CudaNdarray_SIZE(rval) * sizeof(real));
cudaError_t err = cudaMemset(rval->devdata, 0,
CudaNdarray_SIZE(rval) * sizeof(real));
Py_XDECREF(rval);
if (err)
{
......@@ -1760,7 +1765,8 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
} else {
Py_XDECREF(rval);
PyErr_SetString(PyExc_NotImplementedError,
"CudaNdarray.__setitem__: we support setting only python scalar of value 0, numpy nd array and CudaNdarray.");
"CudaNdarray.__setitem__: we support setting only python"
" scalar of value 0, numpy nd array and CudaNdarray.");
return -1;
}
}
......@@ -1769,16 +1775,25 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
if(!CudaNdarray_Check(o) || !CudaNdarray_Check(value))
{
PyErr_SetString(PyExc_TypeError, "CudaNdarray.__setitem__: left must be a CudaNdarrays and right must be a CudaNdarrays, an ndarray or a python scalar of value 0.");
PyErr_SetString(PyExc_TypeError,
"CudaNdarray.__setitem__: left must be a CudaNdarrays and right"
" must be a CudaNdarrays, an ndarray or a python scalar of value 0.");
Py_XDECREF(new_value);
return -1;
}
if (verbose) fprintf(stderr, "CudaNdarray_setitem dest and value are CudaNdarray\n");
if (verbose)
fprintf(stderr, "CudaNdarray_setitem dest and value are CudaNdarray\n");
if (cnda_copy_structure_to_device(rval))
{
PyErr_SetString(PyExc_RuntimeError, "CudaNdarray.__setitem__: syncing structure to device failed");
PyErr_SetString(PyExc_RuntimeError,
"CudaNdarray.__setitem__: syncing structure to device failed");
Py_DECREF(rval);
if (verbose) fprintf(stderr, "CudaNdarray_setitem error end\n");
Py_XDECREF(new_value);
if (verbose)
fprintf(stderr, "CudaNdarray_setitem error end\n");
return -1;
}
......@@ -1787,7 +1802,10 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
if(CudaNdarray_CopyFromCudaNdarray(rval, (CudaNdarray*)value, true))
{
Py_DECREF((PyObject*)rval);
if (verbose) fprintf(stderr, "CudaNdarray_setitem error end\n");
Py_XDECREF(new_value);
if (verbose)
fprintf(stderr, "CudaNdarray_setitem error end\n");
return -1;
}
......@@ -1796,6 +1814,7 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
// Clean up locally-created references
Py_DECREF(rval);
Py_XDECREF(new_value);
return 0;
}
......@@ -2759,7 +2778,9 @@ static __global__ void k_copy_1d(const int N, const float * x, const int sx, flo
}
//copy from other into self
int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, const CudaNdarray * other, bool unbroadcast)
int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self,
const CudaNdarray * other,
bool unbroadcast)
{
int verbose = 0;
if (verbose>1) fprintf(stderr, "CudaNdarray_CopyFromCudaNdarray\n");
......@@ -2771,15 +2792,29 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, const CudaNdarray * othe
"can't copy into un-initialized CudaNdarray");
return -1;
}
if (self->nd != other->nd)
CudaNdarray * new_other = NULL;
if (self->nd < other->nd)
{
PyErr_Format(PyExc_NotImplementedError,
"CudaNdarray_CopyFromCudaNdarray: need same number of"
" dims. destination nd=%d, source nd=%d."
" No broadcasting implemented.",
"CudaNdarray_CopyFromCudaNdarray: The destination need more or the"
" same number of dimensions then the source. Got %d and %d.",
self->nd, other->nd);
return -1;
}
else if (self->nd != other->nd)
{
new_other = (CudaNdarray *) CudaNdarray_View(other);
int added_dims = self->nd - other->nd;
int pattern[self->nd];
for(int i = 0; i < added_dims; i++)
pattern[i] = -1;
for(int i = 0; i < other->nd; i++)
pattern[i + added_dims] = i;
CudaNdarray_dimshuffle(new_other, self->nd, pattern);
other = new_other;
}
assert(self->nd == other->nd);
//standard elemwise dim checks (also compute total size)
unsigned int size = 1;
unsigned int size_source = 1;
......@@ -2793,13 +2828,15 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, const CudaNdarray * othe
" destination=%d, source=%d",
i, CudaNdarray_HOST_DIMS(self)[i],
CudaNdarray_HOST_DIMS(other)[i]);
return -1;
Py_XDECREF(new_other);
return -1;
}
size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i];
size_source *= (unsigned int) CudaNdarray_HOST_DIMS(other)[i];
}
if (0 == size)
{
Py_XDECREF(new_other);
return 0; //nothing to copy, we're done.
}
if (CudaNdarray_is_c_contiguous(self) &&
......@@ -2812,6 +2849,7 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, const CudaNdarray * othe
cublasScopy(size, CudaNdarray_DEV_DATA(other), 1,
CudaNdarray_DEV_DATA(self), 1);
CNDA_THREAD_SYNC;
Py_XDECREF(new_other);
if (CUBLAS_STATUS_SUCCESS != cublasGetError())
{
PyErr_SetString(PyExc_RuntimeError, "Error copying memory");
......@@ -2849,6 +2887,7 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, const CudaNdarray * othe
"Cuda error: %s: %s. (n_blocks=%i,"
" n_threads_per_block=%i)\n", "k_copy_1d",
cudaGetErrorString(err), n_blocks, n_threads);
Py_XDECREF(new_other);
return -1;
}
}; break;
......@@ -2893,10 +2932,12 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, const CudaNdarray * othe
"k_elemwise_unary_rowmajor_copy",
cudaGetErrorString(err), n_blocks,
threads_per_block);
Py_XDECREF(new_other);
return -1;
}
}
};
Py_XDECREF(new_other);
return 0;
}
......@@ -4088,7 +4129,7 @@ int CudaNdarray_set_nd(CudaNdarray * self, const int nd)
return 0;
}
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, CudaNdarray * base)
int CudaNdarray_set_device_data(CudaNdarray * self, float * data, const CudaNdarray * base)
{
return CudaNdarray_set_device_data(self, data, (PyObject *) base);
}
......
......@@ -282,7 +282,7 @@ static PyObject *CudaNdarray_NewDims(int nd, const inttype * dims)
* Set self to be a view of given `data`, owned by existing CudaNdarray `base`.
*/
DllExport int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base);
DllExport int CudaNdarray_set_device_data(CudaNdarray * self, float * data, CudaNdarray * base);
DllExport int CudaNdarray_set_device_data(CudaNdarray * self, float * data, const CudaNdarray * base);
/**
* Return an independent copy of self
......
......@@ -765,8 +765,6 @@ def local_gpu_advanced_incsubtensor1(node):
'either set the `warn.gpu_set_subtensor1` config '
'option to False, or `warn.ignore_bug_before` to at '
'least \'0.6\'.', stacklevel=1)
if set_instead_of_inc:
return
gpu_op = GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc)
......@@ -799,8 +797,7 @@ def local_gpu_advanced_incsubtensor1(node):
'either set the `warn.gpu_set_subtensor1` config '
'option to False, or `warn.ignore_bug_before` to at '
'least \'0.6\'.', stacklevel=1)
if set_instead_of_inc:
return
gpu_op = GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc)
return [host_from_gpu(gpu_op(gpu_x, gpu_y, *coords))]
......
......@@ -630,13 +630,9 @@ def test_setitem_matrixvector1():
assert numpy.allclose(a,numpy.asarray(_a))
#test direct transfert from numpy
try:
_a[:,1] = b*100
a[:,1] = b*100
raise Exception("CudaNdarray.__setitem__ should have returned an error")
assert numpy.allclose(a,numpy.asarray(_a))
except NotImplementedError, e:
pass
_a[:,1] = b*100
a[:,1] = b*100
assert numpy.allclose(a,numpy.asarray(_a))
row = theano._asarray([777,888,999], dtype='float32')
_a[1,:] = row
......@@ -659,13 +655,9 @@ def test_setitem_matrix_tensor3():
assert numpy.allclose(a,numpy.asarray(_a))
#test direct transfert from numpy
try:
_a[:,1,1] = b*100
a[:,1,1] = b*100
raise Exception("CudaNdarray.__setitem__ should have returned an error")
assert numpy.allclose(a,numpy.asarray(_a))
except NotImplementedError:
pass
_a[:,1,1] = b*100
a[:,1,1] = b*100
assert numpy.allclose(a,numpy.asarray(_a))
row = theano._asarray([777,888,999], dtype='float32')
_a[1,1,:] = row
......@@ -714,7 +706,7 @@ def test_setitem_matrix_bad_ndim():
# attempt to assign the ndarray b with setitem
_a[:,:,1] = _b
assert False
except NotImplementedError, e:
except ValueError, e:
#print e
assert True
......@@ -723,7 +715,7 @@ def test_setitem_matrix_bad_ndim():
# attempt to assign the ndarray b with setitem
_a[1,:,:] = b
assert False
except NotImplementedError, e:
except ValueError, e:
#print e
assert True
......@@ -806,7 +798,7 @@ def test_setitem_broadcast():
a[:,:,1] = b.reshape((1,3))
assert numpy.allclose(numpy.asarray(_a),a)
#This is not supported for now.
def test_setitem_broadcast_numpy():
#test scalar to vector without stride
a = numpy.arange(3)
......@@ -814,73 +806,81 @@ def test_setitem_broadcast_numpy():
_a = cuda_ndarray.CudaNdarray(a)
b = theano._asarray(9, dtype='float32')
try:
_a[:] = b.reshape((1,))
a[:] = b.reshape((1,))
assert False
assert numpy.allclose(numpy.asarray(_a),a)
except ValueError:
pass
_a[:] = b.reshape((1,))
a[:] = b.reshape((1,))
assert numpy.allclose(numpy.asarray(_a), a)
#test vector to matrice without stride
a = numpy.arange(9)
a.resize((3,3))
a.resize((3, 3))
a = theano._asarray(a, dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
try:
b = theano._asarray([7,8,9], dtype='float32')
_a[:,:] = b.reshape((1,3))
a[:,:] = b.reshape((1,3))
assert False
assert numpy.allclose(numpy.asarray(_a),a)
except ValueError:
pass
b = theano._asarray([7, 8, 9], dtype='float32')
_a[:, :] = b.reshape((1, 3))
a[:, :] = b.reshape((1, 3))
assert numpy.allclose(numpy.asarray(_a), a)
#test vector to matrice with stride
a = numpy.arange(27)
a.resize((3,3,3))
a.resize((3, 3, 3))
a = theano._asarray(a, dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
try:
b = theano._asarray([[7,8,9],[10,11,12]], dtype='float32')
b = b[0]
_a[1,:,:] = b.reshape((1,3))
a[1,:,:] = b.reshape((1,3))
assert False
assert numpy.allclose(numpy.asarray(_a),a)
except ValueError:
pass
b = theano._asarray([[7, 8, 9], [10, 11, 12]], dtype='float32')
b = b[0]
_a[1, :, :] = b.reshape((1, 3))
a[1, :, :] = b.reshape((1, 3))
assert numpy.allclose(numpy.asarray(_a), a)
# this also fails for the moment
def test_setitem_rightvalue_ndarray_fails():
"""
Now we don't automatically add dimensions to broadcast
"""
a = numpy.arange(27)
a.resize((3,3,3))
a = numpy.arange(3 * 4 * 5)
a.resize((3, 4, 5))
a = theano._asarray(a, dtype='float32')
_a = cuda_ndarray.CudaNdarray(a)
b = theano._asarray([7,8,9], dtype='float32')
b = theano._asarray([7, 8, 9, 10], dtype='float32')
_b = cuda_ndarray.CudaNdarray(b)
b5 = theano._asarray([7, 8, 9, 10, 11], dtype='float32')
_b5 = cuda_ndarray.CudaNdarray(b)
# attempt to assign the ndarray b with setitem
_a[:, :, 1] = _b
a[:, :, 1] = b
assert numpy.allclose(numpy.asarray(_a), a)
#test direct transfert from numpy to contiguous region
# attempt to assign the ndarray b with setitem
# same number of dim
mat = numpy.random.rand(4, 5).astype('float32')
_a[2, :, :] = mat
a[2, :, :] = mat
assert numpy.allclose(numpy.asarray(_a), a)
# without same number of dim
try:
# attempt to assign the ndarray b with setitem
_a[:,:,1] = _b
assert False
except NotImplementedError, e:
#print e
assert True
_a[0, :, :] = mat
#a[0, :, :] = mat
#assert numpy.allclose(numpy.asarray(_a), a)
except ValueError, e:
pass
#test direct transfert from numpy with broadcast
_a[0, :, :] = b5
a[0, :, :] = b5
assert numpy.allclose(numpy.asarray(_a), a)
#test direct transfert from numpy to not contiguous region
# attempt to assign the ndarray b with setitem
_a[:, :, 2] = b
a[:, :, 2] = b
assert numpy.allclose(numpy.asarray(_a), a)
#test direct transfert from numpy
try:
# attempt to assign the ndarray b with setitem
_a[:,:,1] = b
assert False
except NotImplementedError, e:
#print e
assert True
def test_zeros_basic():
for shp in [(3,4,5), (300,), (), (0,7)]:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论