提交 f2693c57 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Make dev_structure_fresh mutable, add a few const

This enables calling CudaNdarray_Copy on a const CudaNdarray*, which was previously impossible.
上级 22f11d45
...@@ -453,7 +453,7 @@ PyObject* CudaNdarray_Zeros(PyObject* dummy, PyObject* shape) ...@@ -453,7 +453,7 @@ PyObject* CudaNdarray_Zeros(PyObject* dummy, PyObject* shape)
PyObject * CudaNdarray_Copy(CudaNdarray * self) PyObject * CudaNdarray_Copy(const CudaNdarray * self)
{ {
PyObject * rval = CudaNdarray_New(); PyObject * rval = CudaNdarray_New();
if ((!rval) || (-1 == self->nd)) if ((!rval) || (-1 == self->nd))
...@@ -2777,7 +2777,7 @@ static __global__ void k_copy_1d(const int N, const float * x, const int sx, flo ...@@ -2777,7 +2777,7 @@ static __global__ void k_copy_1d(const int N, const float * x, const int sx, flo
} }
//copy from other into self //copy from other into self
int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, bool unbroadcast) int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, const CudaNdarray * other, bool unbroadcast)
{ {
int verbose = 0; int verbose = 0;
if (verbose>1) fprintf(stderr, "CudaNdarray_CopyFromCudaNdarray\n"); if (verbose>1) fprintf(stderr, "CudaNdarray_CopyFromCudaNdarray\n");
...@@ -2856,7 +2856,7 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, boo ...@@ -2856,7 +2856,7 @@ int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, boo
// call worker routine // call worker routine
unsigned int n_blocks = std::min(size, (unsigned int)NUM_VECTOR_OP_BLOCKS); unsigned int n_blocks = std::min(size, (unsigned int)NUM_VECTOR_OP_BLOCKS);
unsigned int threads_per_block = std::min(ceil_intdiv(size, n_blocks), (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); unsigned int threads_per_block = std::min(ceil_intdiv(size, n_blocks), (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
CudaNdarray * cuda_dims = other; const CudaNdarray * cuda_dims = other;
if(unbroadcast) if(unbroadcast)
cuda_dims = self; cuda_dims = self;
//copy from other into self //copy from other into self
...@@ -3099,7 +3099,7 @@ int CudaNdarray_sgemv(float alpha, const CudaNdarray * A, const CudaNdarray * B, ...@@ -3099,7 +3099,7 @@ int CudaNdarray_sgemv(float alpha, const CudaNdarray * A, const CudaNdarray * B,
return 0; return 0;
} }
int CudaNdarray_sger(float alpha, CudaNdarray * x, CudaNdarray * y, CudaNdarray * A) { int CudaNdarray_sger(float alpha, const CudaNdarray * x, const CudaNdarray * y, CudaNdarray * A) {
if (x->nd != 1) { PyErr_SetString(PyExc_ValueError, "non-vector arg x to sger"); return -1; } if (x->nd != 1) { PyErr_SetString(PyExc_ValueError, "non-vector arg x to sger"); return -1; }
if (y->nd != 1) { PyErr_SetString(PyExc_ValueError, "non-vector arg y to sger"); return -1; } if (y->nd != 1) { PyErr_SetString(PyExc_ValueError, "non-vector arg y to sger"); return -1; }
if (A->nd != 2) { PyErr_SetString(PyExc_ValueError, "non-matrix arg A to sger"); return -1; } if (A->nd != 2) { PyErr_SetString(PyExc_ValueError, "non-matrix arg A to sger"); return -1; }
...@@ -3122,7 +3122,7 @@ int CudaNdarray_sger(float alpha, CudaNdarray * x, CudaNdarray * y, CudaNdarray ...@@ -3122,7 +3122,7 @@ int CudaNdarray_sger(float alpha, CudaNdarray * x, CudaNdarray * y, CudaNdarray
} }
// Since Sger expects A in col-major, we invert x and y to fake this. // Since Sger expects A in col-major, we invert x and y to fake this.
int x_strides = CudaNdarray_HOST_STRIDES(x)[0]; int x_strides = CudaNdarray_HOST_STRIDES(x)[0];
CudaNdarray * x_ = x; const CudaNdarray * x_ = x;
if(x_strides == 0){ if(x_strides == 0){
if(CudaNdarray_HOST_DIMS(x)[0] != 1){ if(CudaNdarray_HOST_DIMS(x)[0] != 1){
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -3138,7 +3138,7 @@ int CudaNdarray_sger(float alpha, CudaNdarray * x, CudaNdarray * y, CudaNdarray ...@@ -3138,7 +3138,7 @@ int CudaNdarray_sger(float alpha, CudaNdarray * x, CudaNdarray * y, CudaNdarray
} }
int y_strides = CudaNdarray_HOST_STRIDES(y)[0]; int y_strides = CudaNdarray_HOST_STRIDES(y)[0];
CudaNdarray * y_ = y; const CudaNdarray * y_ = y;
if(y_strides == 0){ if(y_strides == 0){
if(CudaNdarray_HOST_DIMS(y)[0] != 1){ if(CudaNdarray_HOST_DIMS(y)[0] != 1){
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -3816,7 +3816,7 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s) ...@@ -3816,7 +3816,7 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
int int
cnda_copy_structure_to_device(CudaNdarray * self) cnda_copy_structure_to_device(const CudaNdarray * self)
{ {
cublasSetVector(cnda_structure_size(self->nd), sizeof(int), self->host_structure, 1, self->dev_structure, 1); cublasSetVector(cnda_structure_size(self->nd), sizeof(int), self->host_structure, 1, self->dev_structure, 1);
CNDA_THREAD_SYNC; CNDA_THREAD_SYNC;
...@@ -3830,7 +3830,7 @@ cnda_copy_structure_to_device(CudaNdarray * self) ...@@ -3830,7 +3830,7 @@ cnda_copy_structure_to_device(CudaNdarray * self)
} }
const int * const int *
CudaNdarray_DEV_DIMS(CudaNdarray * self) CudaNdarray_DEV_DIMS(const CudaNdarray * self)
{ {
if (!self->dev_structure_fresh) if (!self->dev_structure_fresh)
{ {
...@@ -3840,7 +3840,7 @@ CudaNdarray_DEV_DIMS(CudaNdarray * self) ...@@ -3840,7 +3840,7 @@ CudaNdarray_DEV_DIMS(CudaNdarray * self)
return self->dev_structure; return self->dev_structure;
} }
const int * const int *
CudaNdarray_DEV_STRIDES(CudaNdarray * self) CudaNdarray_DEV_STRIDES(const CudaNdarray * self)
{ {
if (!self->dev_structure_fresh) if (!self->dev_structure_fresh)
{ {
...@@ -3850,7 +3850,7 @@ CudaNdarray_DEV_STRIDES(CudaNdarray * self) ...@@ -3850,7 +3850,7 @@ CudaNdarray_DEV_STRIDES(CudaNdarray * self)
return self->dev_structure + self->nd; return self->dev_structure + self->nd;
} }
const int * const int *
CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self) CudaNdarray_DEV_LOG2DIMS(const CudaNdarray * self)
{ {
if (!self->dev_structure_fresh) if (!self->dev_structure_fresh)
{ {
......
...@@ -81,7 +81,7 @@ struct CudaNdarray ...@@ -81,7 +81,7 @@ struct CudaNdarray
//device pointers (allocated by cudaMalloc) //device pointers (allocated by cudaMalloc)
int dev_structure_fresh; mutable int dev_structure_fresh;
//dev_structure should be accessed via macros, otherwise may not be synchronized //dev_structure should be accessed via macros, otherwise may not be synchronized
int * dev_structure; //dim0, dim1, ..., stride0, stride1, ... int * dev_structure; //dim0, dim1, ..., stride0, stride1, ...
real* devdata; //pointer to data element [0,..,0]. real* devdata; //pointer to data element [0,..,0].
...@@ -154,11 +154,11 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s); ...@@ -154,11 +154,11 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s);
* *
* This means: recalculate the log2dims and transfer structure to the card * This means: recalculate the log2dims and transfer structure to the card
*/ */
DllExport int cnda_copy_structure_to_device(CudaNdarray * self); DllExport int cnda_copy_structure_to_device(const CudaNdarray * self);
DllExport const int *CudaNdarray_DEV_DIMS(CudaNdarray * self); DllExport const int *CudaNdarray_DEV_DIMS(const CudaNdarray * self);
DllExport const int *CudaNdarray_DEV_STRIDES(CudaNdarray * self); DllExport const int *CudaNdarray_DEV_STRIDES(const CudaNdarray * self);
DllExport const int *CudaNdarray_DEV_LOG2DIMS(CudaNdarray * self); DllExport const int *CudaNdarray_DEV_LOG2DIMS(const CudaNdarray * self);
DllExport float *CudaNdarray_DEV_DATA(const CudaNdarray * self); DllExport float *CudaNdarray_DEV_DATA(const CudaNdarray * self);
/** /**
...@@ -283,7 +283,7 @@ DllExport PyObject * CudaNdarray_DeepCopy(CudaNdarray * self, PyObject * memo); ...@@ -283,7 +283,7 @@ DllExport PyObject * CudaNdarray_DeepCopy(CudaNdarray * self, PyObject * memo);
/** /**
* Return an independent copy of self * Return an independent copy of self
*/ */
DllExport PyObject * CudaNdarray_Copy(CudaNdarray * self); DllExport PyObject * CudaNdarray_Copy(const CudaNdarray * self);
/** /**
* Return a new object obtained by summing over the dimensions for which there is a 1 in the mask. * Return a new object obtained by summing over the dimensions for which there is a 1 in the mask.
...@@ -302,7 +302,7 @@ DllExport int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj); ...@@ -302,7 +302,7 @@ DllExport int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj);
* *
* self is reallocated to have the correct dimensions if necessary. * self is reallocated to have the correct dimensions if necessary.
*/ */
DllExport int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, CudaNdarray * other, bool unbroadcast = false); DllExport int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, const CudaNdarray * other, bool unbroadcast = false);
/** /**
* Transfer the contents of CudaNdarray `self` to a new numpy ndarray. * Transfer the contents of CudaNdarray `self` to a new numpy ndarray.
...@@ -321,7 +321,7 @@ DllExport PyObject * CudaNdarray_IS_C_Contiguous(CudaNdarray * self); ...@@ -321,7 +321,7 @@ DllExport PyObject * CudaNdarray_IS_C_Contiguous(CudaNdarray * self);
DllExport int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C); DllExport int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C);
DllExport int CudaNdarray_sgemv(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C); DllExport int CudaNdarray_sgemv(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C);
DllExport int CudaNdarray_sger(float alpha, CudaNdarray * x, CudaNdarray * y, CudaNdarray* A); DllExport int CudaNdarray_sger(float alpha, const CudaNdarray * x, const CudaNdarray * y, CudaNdarray* A);
DllExport int CudaNdarray_reduce_sum(CudaNdarray * self, CudaNdarray * A); DllExport int CudaNdarray_reduce_sum(CudaNdarray * self, CudaNdarray * A);
DllExport int CudaNdarray_reduce_prod(CudaNdarray * self, CudaNdarray * A); DllExport int CudaNdarray_reduce_prod(CudaNdarray * self, CudaNdarray * A);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论