提交 068c416c authored 作者: Ian Goodfellow's avatar Ian Goodfellow

doc

上级 7b47fbde
...@@ -1204,6 +1204,7 @@ CudaNdarray_exp(CudaNdarray* self) ...@@ -1204,6 +1204,7 @@ CudaNdarray_exp(CudaNdarray* self)
return (PyObject*)rval; return (PyObject*)rval;
} }
static PyMethodDef CudaNdarray_methods[] = static PyMethodDef CudaNdarray_methods[] =
{ {
{"__array__", {"__array__",
......
...@@ -75,15 +75,16 @@ struct CudaNdarray ...@@ -75,15 +75,16 @@ struct CudaNdarray
/* Type-specific fields go here. */ /* Type-specific fields go here. */
//GpuTensorType::VoidTensor * vt; //GpuTensorType::VoidTensor * vt;
int nd; //the number of dimensions of the tensor int nd; //the number of dimensions of the tensor
// Client should acces host_structure via CudaNdarray_HOST_DIMS / CudaNdarray_HOST_STRIDES macros // Client should acces host_structure via CudaNdarray_HOST_DIMS / CudaNdarray_HOST_STRIDES functions
int * host_structure; //dim0, dim1, ... stride0, stride1, ... int * host_structure; //dim0, dim1, ... stride0, stride1, ...
int data_allocated; //the number of bytes allocated for devdata int data_allocated; //the number of bytes allocated for devdata
//device pointers (allocated by cudaMalloc) //device pointers (allocated by cudaMalloc)
mutable int dev_structure_fresh; mutable int dev_structure_fresh;
//dev_structure should be accessed via macros, otherwise may not be //dev_structure should be accessed via the functions like
//synchronized. The macro will allocate it when needed. //CudaNdarray_DEV_DIMS, otherwise may not be
//synchronized with host_structure. The accessor functions will allocate it when needed.
mutable int * dev_structure; //dim0, dim1, ..., stride0, stride1, ... mutable int * dev_structure; //dim0, dim1, ..., stride0, stride1, ...
real* devdata; //pointer to data element [0,..,0]. real* devdata; //pointer to data element [0,..,0].
}; };
...@@ -118,6 +119,12 @@ CudaNdarray_is_c_contiguous(const CudaNdarray * self); ...@@ -118,6 +119,12 @@ CudaNdarray_is_c_contiguous(const CudaNdarray * self);
*/ */
DllExport int cnda_structure_size(int nd); DllExport int cnda_structure_size(int nd);
/*
* This describes the shape of the ndarray. The array
* of dimensions is itself stored on the host.
* If you need to access the dimensions array from inside
* a kernel, use CudaNdarray_DEVICE_DIMS.
*/
DllExport const int * DllExport const int *
CudaNdarray_HOST_DIMS(const CudaNdarray * self); CudaNdarray_HOST_DIMS(const CudaNdarray * self);
...@@ -188,6 +195,10 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s) ...@@ -188,6 +195,10 @@ CudaNdarray_set_stride(CudaNdarray * self, int idx, int s)
*/ */
DllExport int cnda_copy_structure_to_device(const CudaNdarray * self); DllExport int cnda_copy_structure_to_device(const CudaNdarray * self);
/* CudaNdarray_DEV_DIMS gives the same information as CudaNdarray_HOST_DIMS,
* but stored on the GPU. Use this pointer when it needs to be accessed
* from inside a CUDA kernel.
*/
DllExport const int *CudaNdarray_DEV_DIMS(const CudaNdarray * self); DllExport const int *CudaNdarray_DEV_DIMS(const CudaNdarray * self);
DllExport const int *CudaNdarray_DEV_STRIDES(const CudaNdarray * self); DllExport const int *CudaNdarray_DEV_STRIDES(const CudaNdarray * self);
DllExport const int *CudaNdarray_DEV_LOG2DIMS(const CudaNdarray * self); DllExport const int *CudaNdarray_DEV_LOG2DIMS(const CudaNdarray * self);
...@@ -389,8 +400,21 @@ DllExport int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj); ...@@ -389,8 +400,21 @@ DllExport int CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj);
* Transfer the contents of CudaNdarray `other` to `self`. * Transfer the contents of CudaNdarray `other` to `self`.
* *
* self is reallocated to have the correct dimensions if necessary. * self is reallocated to have the correct dimensions if necessary.
* TODO: WRITEME: what does "if necessary" mean?
* TODO: we use this to implement set/inc subtensor, where self is a view of
* the original tensor so that we write only to the subtensor. How
* do we ensure that self is not reallocated in this case?
*
* unbroadcast: if true, this means that other is broadcastable in some
* dimensions, and the result, self, is not.
* ie, if unbroadcast=false, we must do the broadcasting
* operation as part of the copy.
* e.g. suppose self and other are 2D matrices and other
* has only one row. Then we need to copy this row several
* times when copying to self.
*/ */
DllExport int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, const CudaNdarray * other, bool unbroadcast = false); DllExport int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self,
const CudaNdarray * other, bool unbroadcast = false);
/** /**
* Transfer the contents of CudaNdarray `self` to a new numpy ndarray. * Transfer the contents of CudaNdarray `self` to a new numpy ndarray.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论