提交 2d9aff75 authored 作者: Julien Demouth's avatar Julien Demouth 提交者: Frederic

Update the version of CNMeM

This new version of CNMeM corresponds to the following commit https://github.com/NVIDIA/cnmem/commit/2559e911ca5ad33c8a8aa7e5877345265115d963 It contains two bug fixes: 1/ One critical fix when the first device used by the library is not device 0. It could result in a call to cudaSetDevice(-1). 2/ One minor issue with a call to cnmemMalloc with a non-NULL pointer but a size of 0. It returned an CNMEM_STATUS_BAD_PARAM in the past. It is now fixed and it returns success. I also changed the code in cuda_ndarray.cu to remove the extra check when the size of the allocation was 0.
上级 130d2ce9
...@@ -57,7 +57,6 @@ typedef enum ...@@ -57,7 +57,6 @@ typedef enum
CNMEM_STATUS_SUCCESS = 0, CNMEM_STATUS_SUCCESS = 0,
CNMEM_STATUS_CUDA_ERROR, CNMEM_STATUS_CUDA_ERROR,
CNMEM_STATUS_INVALID_ARGUMENT, CNMEM_STATUS_INVALID_ARGUMENT,
CNMEM_STATUS_MEMORY_LEAK,
CNMEM_STATUS_NOT_INITIALIZED, CNMEM_STATUS_NOT_INITIALIZED,
CNMEM_STATUS_OUT_OF_MEMORY, CNMEM_STATUS_OUT_OF_MEMORY,
CNMEM_STATUS_UNKNOWN_ERROR CNMEM_STATUS_UNKNOWN_ERROR
...@@ -109,29 +108,58 @@ typedef struct cnmemDevice_t_ ...@@ -109,29 +108,58 @@ typedef struct cnmemDevice_t_
cnmemStatus_t CNMEM_API cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned flags); cnmemStatus_t CNMEM_API cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned flags);
/** /**
* \brief Add a new stream to the pool of managed streams on a device. * \brief Release all the allocated memory.
* *
* This function registers a new stream into a device memory manager. It is thread-safe. * This function must be called by a single thread and after all threads that called
* cnmemMalloc/cnmemFree have joined. This function is not thread-safe.
* *
* \return * \return
* CNMEM_STATUS_SUCCESS, if everything goes fine, * CNMEM_STATUS_SUCCESS, if everything goes fine,
* CNMEM_STATUS_INVALID_ARGUMENT, if one of the argument is invalid, * CNMEM_STATUS_NOT_INITIALIZED, if the ::cnmemInit function has not been called,
* CNMEM_STATUS_CUDA_ERROR, if an error happens in one of the CUDA functions.
*/ */
cnmemStatus_t CNMEM_API cnmemRegisterStream(cudaStream_t stream); cnmemStatus_t CNMEM_API cnmemFinalize();
/** /**
* \brief Release all the allocated memory. * \brief Increase the internal reference counter of the context object.
* *
* This function must be called by a single thread and after all threads that called * This function increases the internal reference counter of the library. The purpose of that
* cnmemMalloc/cnmemFree have joined. This function is not thread-safe. * reference counting mechanism is to give more control to the user over the lifetime of the
* library. It is useful with scoped memory allocation which may be destroyed in a final
* memory collection after the end of main(). That function is thread-safe.
* *
* \return * \return
* CNMEM_STATUS_SUCCESS, if everything goes fine, * CNMEM_STATUS_SUCCESS, if everything goes fine,
* CNMEM_STATUS_NOT_INITIALIZED, if the ::cnmemInit function has not been called, * CNMEM_STATUS_NOT_INITIALIZED, if the ::cnmemInit function has not been called,
* CNMEM_STATUS_MEMORY_LEAK, if there are unreleased blocks in the memory queues,
* CNMEM_STATUS_CUDA_ERROR, if an error happens in one of the CUDA functions.
*/ */
cnmemStatus_t CNMEM_API cnmemFinalize(); cnmemStatus_t CNMEM_API cnmemRetain();
/**
* \brief Decrease the internal reference counter of the context object.
*
* This function decreases the internal reference counter of the library. The purpose of that
* reference counting mechanism is to give more control to the user over the lifetime of the
* library. It is useful with scoped memory allocation which may be destroyed in a final
* memory collection after the end of main(). That function is thread-safe.
*
* You can use \c cnmemRelease to explicitly finalize the library.
*
* \return
* CNMEM_STATUS_SUCCESS, if everything goes fine,
* CNMEM_STATUS_NOT_INITIALIZED, if the ::cnmemInit function has not been called,
*/
cnmemStatus_t CNMEM_API cnmemRelease();
/**
* \brief Add a new stream to the pool of managed streams on a device.
*
* This function registers a new stream into a device memory manager. It is thread-safe.
*
* \return
* CNMEM_STATUS_SUCCESS, if everything goes fine,
* CNMEM_STATUS_INVALID_ARGUMENT, if one of the argument is invalid,
*/
cnmemStatus_t CNMEM_API cnmemRegisterStream(cudaStream_t stream);
/** /**
* \brief Allocate memory. * \brief Allocate memory.
......
...@@ -138,9 +138,7 @@ void * device_malloc(size_t size, int verbose) ...@@ -138,9 +138,7 @@ void * device_malloc(size_t size, int verbose)
///@TODO: thejaswi: support for multiple-streams? ///@TODO: thejaswi: support for multiple-streams?
if(g_use_cnmem) { if(g_use_cnmem) {
cnmemStatus_t status = CNMEM_STATUS_SUCCESS; cnmemStatus_t status = CNMEM_STATUS_SUCCESS;
if( size != 0 ) {
status = cnmemMalloc(&rval, size, NULL); status = cnmemMalloc(&rval, size, NULL);
}
if(status != CNMEM_STATUS_SUCCESS) { if(status != CNMEM_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, PyErr_Format(PyExc_MemoryError,
"Error allocating %zd bytes of device memory (%s).", "Error allocating %zd bytes of device memory (%s).",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论