提交 2d9aff75 authored 作者: Julien Demouth's avatar Julien Demouth 提交者: Frederic

Update the version of CNMeM

This new version of CNMeM corresponds to the following commit https://github.com/NVIDIA/cnmem/commit/2559e911ca5ad33c8a8aa7e5877345265115d963 It contains two bug fixes: 1/ One critical fix when the first device used by the library is not device 0. It could result in a call to cudaSetDevice(-1). 2/ One minor issue with a call to cnmemMalloc with a non-NULL pointer but a size of 0. It returned an CNMEM_STATUS_BAD_PARAM in the past. It is now fixed and it returns success. I also changed the code in cuda_ndarray.cu to remove the extra check when the size of the allocation was 0.
上级 130d2ce9
...@@ -50,7 +50,6 @@ extern "C" const char* cnmemGetErrorString(cnmemStatus_t status) { ...@@ -50,7 +50,6 @@ extern "C" const char* cnmemGetErrorString(cnmemStatus_t status) {
case CNMEM_STATUS_SUCCESS: return "CNMEM_STATUS_SUCCESS"; case CNMEM_STATUS_SUCCESS: return "CNMEM_STATUS_SUCCESS";
case CNMEM_STATUS_CUDA_ERROR: return "CNMEM_STATUS_CUDA_ERROR"; case CNMEM_STATUS_CUDA_ERROR: return "CNMEM_STATUS_CUDA_ERROR";
case CNMEM_STATUS_INVALID_ARGUMENT: return "CNMEM_STATUS_INVALID_ARGUMENT"; case CNMEM_STATUS_INVALID_ARGUMENT: return "CNMEM_STATUS_INVALID_ARGUMENT";
case CNMEM_STATUS_MEMORY_LEAK: return "CNMEM_STATUS_MEMORY_LEAK";
case CNMEM_STATUS_NOT_INITIALIZED: return "CNMEM_STATUS_NOT_INITIALIZED"; case CNMEM_STATUS_NOT_INITIALIZED: return "CNMEM_STATUS_NOT_INITIALIZED";
case CNMEM_STATUS_OUT_OF_MEMORY: return "CNMEM_STATUS_OUT_OF_MEMORY"; case CNMEM_STATUS_OUT_OF_MEMORY: return "CNMEM_STATUS_OUT_OF_MEMORY";
default: return "CNMEM_STATUS_UNKNOWN_ERROR"; default: return "CNMEM_STATUS_UNKNOWN_ERROR";
...@@ -324,12 +323,6 @@ class Manager { ...@@ -324,12 +323,6 @@ class Manager {
/// To support multi-threading. Each manager has its own mutex. /// To support multi-threading. Each manager has its own mutex.
Mutex mMutex; Mutex mMutex;
public:
/// The root manager for a given device.
static inline Manager& getRootManager(int device) { return getRootManagers()[device]; }
/// The list of all the root managers.
static std::vector<Manager>& getRootManagers();
public: public:
/// Create an unitialized manager. /// Create an unitialized manager.
Manager(); Manager();
...@@ -341,7 +334,7 @@ public: ...@@ -341,7 +334,7 @@ public:
/// Release a block of memory. /// Release a block of memory.
cnmemStatus_t release(void *ptr); cnmemStatus_t release(void *ptr);
/// Release memory. It returns true if we have no memory leak. /// Release memory. It returns true if we have no memory leak.
cnmemStatus_t releaseAllUnsafe(bool &memoryLeak); cnmemStatus_t releaseAllUnsafe();
/// Reserve memory for a manager. /// Reserve memory for a manager.
cnmemStatus_t reserve(std::size_t size); cnmemStatus_t reserve(std::size_t size);
/// Steal memory from another manager. /// Steal memory from another manager.
...@@ -445,8 +438,7 @@ Manager::~Manager() { ...@@ -445,8 +438,7 @@ Manager::~Manager() {
if( mDevice == -1 || cudaSetDevice(mDevice) != cudaSuccess ) { // Invalid device, skip it. if( mDevice == -1 || cudaSetDevice(mDevice) != cudaSuccess ) { // Invalid device, skip it.
return; return;
} }
bool memoryLeak; releaseAllUnsafe();
releaseAllUnsafe(memoryLeak);
mMutex.finalize(); mMutex.finalize();
} }
...@@ -640,13 +632,6 @@ cnmemStatus_t Manager::getNumChildren(std::size_t &numChildren) const { ...@@ -640,13 +632,6 @@ cnmemStatus_t Manager::getNumChildren(std::size_t &numChildren) const {
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
std::vector<Manager>& Manager::getRootManagers() {
static std::vector<Manager> managers;
return managers;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t Manager::giveBlockUnsafe(void *&blockData, std::size_t &blockSize, std::size_t size) { cnmemStatus_t Manager::giveBlockUnsafe(void *&blockData, std::size_t &blockSize, std::size_t size) {
// Make sure the block is not in use any more. It could be too coarse grain and we may change // Make sure the block is not in use any more. It could be too coarse grain and we may change
// it in the future. // it in the future.
...@@ -748,21 +733,15 @@ cnmemStatus_t Manager::release(void *ptr) { ...@@ -748,21 +733,15 @@ cnmemStatus_t Manager::release(void *ptr) {
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t Manager::releaseAllUnsafe(bool &memoryLeaks) { cnmemStatus_t Manager::releaseAllUnsafe() {
// Destroy the children if any. // Destroy the children if any.
bool ok = true;
for( std::size_t i = 0; i < mChildren.size(); ++i ) { for( std::size_t i = 0; i < mChildren.size(); ++i ) {
Manager *child = mChildren[i]; Manager *child = mChildren[i];
bool tmp; CNMEM_CHECK(child->releaseAllUnsafe());
CNMEM_CHECK(child->releaseAllUnsafe(tmp));
ok = ok && !tmp;
delete child; delete child;
} }
mChildren.clear(); mChildren.clear();
// We have some issues when integrating into some libraries. This has to fixed in the libs.
memoryLeaks = !ok || mUsedBlocks;
// Destroy used blocks. It's a kind of panic mode to avoid leaks. NOTE: Do that only with roots!!! // Destroy used blocks. It's a kind of panic mode to avoid leaks. NOTE: Do that only with roots!!!
if( !mParent ) { if( !mParent ) {
while( mUsedBlocks ) { while( mUsedBlocks ) {
...@@ -961,6 +940,101 @@ cnmemStatus_t Manager::stealBlockUnsafe(void *&data, std::size_t &dataSize, ::si ...@@ -961,6 +940,101 @@ cnmemStatus_t Manager::stealBlockUnsafe(void *&data, std::size_t &dataSize, ::si
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
class Context {
/// Use a magic number to specify that the context is valid.
enum { CTX_VALID = 0x1f5632a3 };
/// The reference counting mechanism.
int mRefCount;
/// The mutex to increase/decrease the reference counter. TODO: Use atomics.
Mutex mMutex;
/// The memory managers.
std::vector<Manager> mManagers;
/// The global context.
static Context *sCtx;
/// Use a magic number to specify that the context was created.
static int sCtxCheck;
public:
/// Ctor.
Context() : mRefCount(1) { mMutex.initialize(); }
/// Dtor.
~Context();
/// Get the managers.
inline std::vector<Manager>& getManagers() { return mManagers; }
/// Get a single manager associated with a device.
inline Manager& getManager(int i) { return mManagers[i]; }
/// Create the global context.
static cnmemStatus_t create();
/// Check that the context was created.
static inline bool check() { return sCtxCheck == CTX_VALID && sCtx; }
/// Get the global context.
static Context* get();
/// Retain.
static cnmemStatus_t retain();
/// Release.
static cnmemStatus_t release();
};
Context *Context::sCtx;
int Context::sCtxCheck;
///////////////////////////////////////////////////////////////////////////////////////////////////
Context::~Context() {
int oldDevice;
cudaGetDevice(&oldDevice);
for( std::size_t i = 0 ; i < mManagers.size() ; ++i ) {
if( mManagers[i].getDevice() != -1 ) { // Skip invalid managers.
cudaSetDevice(mManagers[i].getDevice());
mManagers[i].releaseAllUnsafe();
}
}
mManagers.clear();
mMutex.finalize();
cudaSetDevice(oldDevice);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t Context::create() {
sCtx = new Context;
sCtxCheck = CTX_VALID;
return CNMEM_STATUS_SUCCESS;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
Context* Context::get() {
CNMEM_ASSERT(Context::check());
return Context::sCtx;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t Context::retain() {
CNMEM_CHECK(sCtx->mMutex.lock());
sCtx->mRefCount++;
CNMEM_CHECK(sCtx->mMutex.unlock());
return CNMEM_STATUS_SUCCESS;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t Context::release() {
CNMEM_CHECK(sCtx->mMutex.lock());
int refCount = --sCtx->mRefCount;
CNMEM_CHECK(sCtx->mMutex.unlock());
if( refCount == 0 ) { // Kill the context.
delete sCtx;
Context::sCtx = NULL;
Context::sCtxCheck = 0;
}
return CNMEM_STATUS_SUCCESS;
}
} // namespace cnmem } // namespace cnmem
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
...@@ -982,10 +1056,14 @@ cnmemStatus_t cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned f ...@@ -982,10 +1056,14 @@ cnmemStatus_t cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned f
maxDevice = devices[i].device; maxDevice = devices[i].device;
} }
} }
// Create the global context.
cnmem::Context::create();
cnmem::Context *ctx = cnmem::Context::get();
// Allocate enough managers. // Allocate enough managers.
CNMEM_CHECK_TRUE(maxDevice >= 0, CNMEM_STATUS_INVALID_ARGUMENT); CNMEM_CHECK_TRUE(maxDevice >= 0, CNMEM_STATUS_INVALID_ARGUMENT);
std::vector<cnmem::Manager> &managers = cnmem::Manager::getRootManagers(); std::vector<cnmem::Manager> &managers = ctx->getManagers();
managers.resize(maxDevice+1); managers.resize(maxDevice+1);
// Create a root manager for each device and create the children. // Create a root manager for each device and create the children.
...@@ -1001,7 +1079,7 @@ cnmemStatus_t cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned f ...@@ -1001,7 +1079,7 @@ cnmemStatus_t cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned f
} }
CNMEM_CHECK_TRUE(size > 0, CNMEM_STATUS_INVALID_ARGUMENT); CNMEM_CHECK_TRUE(size > 0, CNMEM_STATUS_INVALID_ARGUMENT);
cnmem::Manager &manager = cnmem::Manager::getRootManager(devices[i].device); cnmem::Manager &manager = ctx->getManager(devices[i].device);
manager.setDevice(devices[i].device); manager.setDevice(devices[i].device);
manager.setFlags(flags); manager.setFlags(flags);
...@@ -1026,14 +1104,35 @@ cnmemStatus_t cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned f ...@@ -1026,14 +1104,35 @@ cnmemStatus_t cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned f
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t cnmemFinalize() {
CNMEM_CHECK_TRUE(cnmem::Context::check(), CNMEM_STATUS_NOT_INITIALIZED);
return cnmem::Context::release();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t cnmemRetain() {
CNMEM_CHECK_TRUE(cnmem::Context::check(), CNMEM_STATUS_NOT_INITIALIZED);
return cnmem::Context::retain();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t cnmemRelease() {
CNMEM_CHECK_TRUE(cnmem::Context::check(), CNMEM_STATUS_NOT_INITIALIZED);
return cnmem::Context::release();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t cnmemRegisterStream(cudaStream_t stream) { cnmemStatus_t cnmemRegisterStream(cudaStream_t stream) {
CNMEM_CHECK_TRUE(!cnmem::Manager::getRootManagers().empty(), CNMEM_STATUS_NOT_INITIALIZED); CNMEM_CHECK_TRUE(cnmem::Context::check(), CNMEM_STATUS_NOT_INITIALIZED);
CNMEM_CHECK_TRUE(stream, CNMEM_STATUS_INVALID_ARGUMENT); CNMEM_CHECK_TRUE(stream, CNMEM_STATUS_INVALID_ARGUMENT);
int device; int device;
CNMEM_CHECK_CUDA(cudaGetDevice(&device)); CNMEM_CHECK_CUDA(cudaGetDevice(&device));
cnmem::Manager &root = cnmem::Manager::getRootManager(device); cnmem::Manager &root = cnmem::Context::get()->getManager(device);
cnmem::Manager *child = new cnmem::Manager; cnmem::Manager *child = new cnmem::Manager;
child->setParent(&root); child->setParent(&root);
child->setDevice(device); child->setDevice(device);
...@@ -1043,39 +1142,24 @@ cnmemStatus_t cnmemRegisterStream(cudaStream_t stream) { ...@@ -1043,39 +1142,24 @@ cnmemStatus_t cnmemRegisterStream(cudaStream_t stream) {
return CNMEM_STATUS_SUCCESS; return CNMEM_STATUS_SUCCESS;
} }
///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t cnmemFinalize() {
CNMEM_CHECK_TRUE(!cnmem::Manager::getRootManagers().empty(), CNMEM_STATUS_NOT_INITIALIZED);
int oldDevice;
CNMEM_CHECK_CUDA(cudaGetDevice(&oldDevice));
std::vector<cnmem::Manager> &managers = cnmem::Manager::getRootManagers();
bool memoryLeaks = false;
for( std::size_t i = 0; i < managers.size(); ++i ) {
CNMEM_CHECK_CUDA(cudaSetDevice(managers[i].getDevice()));
bool tmpLeaks;
CNMEM_CHECK(managers[i].releaseAllUnsafe(tmpLeaks));
memoryLeaks = memoryLeaks || tmpLeaks;
}
managers.clear();
CNMEM_CHECK_CUDA(cudaSetDevice(oldDevice));
return memoryLeaks ? CNMEM_STATUS_MEMORY_LEAK : CNMEM_STATUS_SUCCESS;
}
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t cnmemMalloc(void **ptr, std::size_t size, cudaStream_t stream) { cnmemStatus_t cnmemMalloc(void **ptr, std::size_t size, cudaStream_t stream) {
CNMEM_CHECK_TRUE(!cnmem::Manager::getRootManagers().empty(), CNMEM_STATUS_NOT_INITIALIZED); CNMEM_CHECK_TRUE(cnmem::Context::check(), CNMEM_STATUS_NOT_INITIALIZED);
if( !ptr && !size ) if( !ptr && !size ) {
return CNMEM_STATUS_SUCCESS;
}
else if( !size ) {
ptr[0] = NULL;
return CNMEM_STATUS_SUCCESS; return CNMEM_STATUS_SUCCESS;
}
CNMEM_CHECK_TRUE(ptr, CNMEM_STATUS_INVALID_ARGUMENT); CNMEM_CHECK_TRUE(ptr, CNMEM_STATUS_INVALID_ARGUMENT);
CNMEM_CHECK_TRUE(size, CNMEM_STATUS_INVALID_ARGUMENT);
int device; int device;
CNMEM_CHECK_CUDA(cudaGetDevice(&device)); CNMEM_CHECK_CUDA(cudaGetDevice(&device));
cnmem::Manager &root = cnmem::Manager::getRootManager(device); cnmem::Manager &root = cnmem::Context::get()->getManager(device);
cnmem::Manager *manager = &root; cnmem::Manager *manager = &root;
if( stream ) { if( stream ) {
CNMEM_CHECK(root.getChildFromStream(manager, stream)); CNMEM_CHECK(root.getChildFromStream(manager, stream));
...@@ -1137,7 +1221,7 @@ cnmemStatus_t cnmemMalloc(void **ptr, std::size_t size, cudaStream_t stream) { ...@@ -1137,7 +1221,7 @@ cnmemStatus_t cnmemMalloc(void **ptr, std::size_t size, cudaStream_t stream) {
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t cnmemFree(void *ptr, cudaStream_t stream) { cnmemStatus_t cnmemFree(void *ptr, cudaStream_t stream) {
CNMEM_CHECK_TRUE(!cnmem::Manager::getRootManagers().empty(), CNMEM_STATUS_NOT_INITIALIZED); CNMEM_CHECK_TRUE(cnmem::Context::check(), CNMEM_STATUS_NOT_INITIALIZED);
if( ptr == NULL ) { if( ptr == NULL ) {
return CNMEM_STATUS_SUCCESS; return CNMEM_STATUS_SUCCESS;
} }
...@@ -1145,7 +1229,7 @@ cnmemStatus_t cnmemFree(void *ptr, cudaStream_t stream) { ...@@ -1145,7 +1229,7 @@ cnmemStatus_t cnmemFree(void *ptr, cudaStream_t stream) {
int device; int device;
CNMEM_CHECK_CUDA(cudaGetDevice(&device)); CNMEM_CHECK_CUDA(cudaGetDevice(&device));
cnmem::Manager &root = cnmem::Manager::getRootManager(device); cnmem::Manager &root = cnmem::Context::get()->getManager(device);
cnmem::Manager *manager = &root; cnmem::Manager *manager = &root;
if( stream ) { if( stream ) {
CNMEM_CHECK(root.getChildFromStream(manager, stream)); CNMEM_CHECK(root.getChildFromStream(manager, stream));
...@@ -1157,12 +1241,12 @@ cnmemStatus_t cnmemFree(void *ptr, cudaStream_t stream) { ...@@ -1157,12 +1241,12 @@ cnmemStatus_t cnmemFree(void *ptr, cudaStream_t stream) {
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t cnmemMemGetInfo(size_t *freeMem, size_t *totalMem, cudaStream_t stream) { cnmemStatus_t cnmemMemGetInfo(size_t *freeMem, size_t *totalMem, cudaStream_t stream) {
CNMEM_CHECK_TRUE(!cnmem::Manager::getRootManagers().empty(), CNMEM_STATUS_NOT_INITIALIZED); CNMEM_CHECK_TRUE(cnmem::Context::check(), CNMEM_STATUS_NOT_INITIALIZED);
CNMEM_CHECK_TRUE(totalMem && freeMem, CNMEM_STATUS_INVALID_ARGUMENT); CNMEM_CHECK_TRUE(totalMem && freeMem, CNMEM_STATUS_INVALID_ARGUMENT);
int device; int device;
CNMEM_CHECK_CUDA(cudaGetDevice(&device)); CNMEM_CHECK_CUDA(cudaGetDevice(&device));
cnmem::Manager &root = cnmem::Manager::getRootManager(device); cnmem::Manager &root = cnmem::Context::get()->getManager(device);
cnmem::Manager *manager = &root; cnmem::Manager *manager = &root;
if( stream ) { if( stream ) {
CNMEM_CHECK(root.getChildFromStream(manager, stream)); CNMEM_CHECK(root.getChildFromStream(manager, stream));
...@@ -1182,11 +1266,11 @@ cnmemStatus_t cnmemMemGetInfo(size_t *freeMem, size_t *totalMem, cudaStream_t st ...@@ -1182,11 +1266,11 @@ cnmemStatus_t cnmemMemGetInfo(size_t *freeMem, size_t *totalMem, cudaStream_t st
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
cnmemStatus_t cnmemPrintMemoryState(FILE *file, cudaStream_t stream) { cnmemStatus_t cnmemPrintMemoryState(FILE *file, cudaStream_t stream) {
CNMEM_CHECK_TRUE(!cnmem::Manager::getRootManagers().empty(), CNMEM_STATUS_NOT_INITIALIZED); CNMEM_CHECK_TRUE(cnmem::Context::check(), CNMEM_STATUS_NOT_INITIALIZED);
int device; int device;
CNMEM_CHECK_CUDA(cudaGetDevice(&device)); CNMEM_CHECK_CUDA(cudaGetDevice(&device));
cnmem::Manager &root = cnmem::Manager::getRootManager(device); cnmem::Manager &root = cnmem::Context::get()->getManager(device);
cnmem::Manager *manager = &root; cnmem::Manager *manager = &root;
if( stream ) { if( stream ) {
CNMEM_CHECK(root.getChildFromStream(manager, stream)); CNMEM_CHECK(root.getChildFromStream(manager, stream));
......
...@@ -57,7 +57,6 @@ typedef enum ...@@ -57,7 +57,6 @@ typedef enum
CNMEM_STATUS_SUCCESS = 0, CNMEM_STATUS_SUCCESS = 0,
CNMEM_STATUS_CUDA_ERROR, CNMEM_STATUS_CUDA_ERROR,
CNMEM_STATUS_INVALID_ARGUMENT, CNMEM_STATUS_INVALID_ARGUMENT,
CNMEM_STATUS_MEMORY_LEAK,
CNMEM_STATUS_NOT_INITIALIZED, CNMEM_STATUS_NOT_INITIALIZED,
CNMEM_STATUS_OUT_OF_MEMORY, CNMEM_STATUS_OUT_OF_MEMORY,
CNMEM_STATUS_UNKNOWN_ERROR CNMEM_STATUS_UNKNOWN_ERROR
...@@ -109,29 +108,58 @@ typedef struct cnmemDevice_t_ ...@@ -109,29 +108,58 @@ typedef struct cnmemDevice_t_
cnmemStatus_t CNMEM_API cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned flags); cnmemStatus_t CNMEM_API cnmemInit(int numDevices, const cnmemDevice_t *devices, unsigned flags);
/** /**
* \brief Add a new stream to the pool of managed streams on a device. * \brief Release all the allocated memory.
*
* This function must be called by a single thread and after all threads that called
* cnmemMalloc/cnmemFree have joined. This function is not thread-safe.
* *
* This function registers a new stream into a device memory manager. It is thread-safe. * \return
* CNMEM_STATUS_SUCCESS, if everything goes fine,
* CNMEM_STATUS_NOT_INITIALIZED, if the ::cnmemInit function has not been called,
* CNMEM_STATUS_CUDA_ERROR, if an error happens in one of the CUDA functions.
*/
cnmemStatus_t CNMEM_API cnmemFinalize();
/**
* \brief Increase the internal reference counter of the context object.
*
* This function increases the internal reference counter of the library. The purpose of that
* reference counting mechanism is to give more control to the user over the lifetime of the
* library. It is useful with scoped memory allocation which may be destroyed in a final
* memory collection after the end of main(). That function is thread-safe.
* *
* \return * \return
* CNMEM_STATUS_SUCCESS, if everything goes fine, * CNMEM_STATUS_SUCCESS, if everything goes fine,
* CNMEM_STATUS_INVALID_ARGUMENT, if one of the argument is invalid, * CNMEM_STATUS_NOT_INITIALIZED, if the ::cnmemInit function has not been called,
*/ */
cnmemStatus_t CNMEM_API cnmemRegisterStream(cudaStream_t stream); cnmemStatus_t CNMEM_API cnmemRetain();
/** /**
* \brief Release all the allocated memory. * \brief Decrease the internal reference counter of the context object.
* *
* This function must be called by a single thread and after all threads that called * This function decreases the internal reference counter of the library. The purpose of that
* cnmemMalloc/cnmemFree have joined. This function is not thread-safe. * reference counting mechanism is to give more control to the user over the lifetime of the
* library. It is useful with scoped memory allocation which may be destroyed in a final
* memory collection after the end of main(). That function is thread-safe.
*
* You can use \c cnmemRelease to explicitly finalize the library.
* *
* \return * \return
* CNMEM_STATUS_SUCCESS, if everything goes fine, * CNMEM_STATUS_SUCCESS, if everything goes fine,
* CNMEM_STATUS_NOT_INITIALIZED, if the ::cnmemInit function has not been called, * CNMEM_STATUS_NOT_INITIALIZED, if the ::cnmemInit function has not been called,
* CNMEM_STATUS_MEMORY_LEAK, if there are unreleased blocks in the memory queues,
* CNMEM_STATUS_CUDA_ERROR, if an error happens in one of the CUDA functions.
*/ */
cnmemStatus_t CNMEM_API cnmemFinalize(); cnmemStatus_t CNMEM_API cnmemRelease();
/**
* \brief Add a new stream to the pool of managed streams on a device.
*
* This function registers a new stream into a device memory manager. It is thread-safe.
*
* \return
* CNMEM_STATUS_SUCCESS, if everything goes fine,
* CNMEM_STATUS_INVALID_ARGUMENT, if one of the argument is invalid,
*/
cnmemStatus_t CNMEM_API cnmemRegisterStream(cudaStream_t stream);
/** /**
* \brief Allocate memory. * \brief Allocate memory.
......
...@@ -138,9 +138,7 @@ void * device_malloc(size_t size, int verbose) ...@@ -138,9 +138,7 @@ void * device_malloc(size_t size, int verbose)
///@TODO: thejaswi: support for multiple-streams? ///@TODO: thejaswi: support for multiple-streams?
if(g_use_cnmem) { if(g_use_cnmem) {
cnmemStatus_t status = CNMEM_STATUS_SUCCESS; cnmemStatus_t status = CNMEM_STATUS_SUCCESS;
if( size != 0 ) { status = cnmemMalloc(&rval, size, NULL);
status = cnmemMalloc(&rval, size, NULL);
}
if(status != CNMEM_STATUS_SUCCESS) { if(status != CNMEM_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, PyErr_Format(PyExc_MemoryError,
"Error allocating %zd bytes of device memory (%s).", "Error allocating %zd bytes of device memory (%s).",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论