提交 29a43a50 authored 作者: Frederic Bastien's avatar Frederic Bastien

merge

import atexit, gc, os, stat
import atexit, os, stat
from theano.compile import optdb
from theano import config
......@@ -96,9 +96,6 @@ if cuda_available:
cuda_initialization_error_message = ""
# actively closing our gpu session presents segfault-on-exit on some systems
atexit.register(gpu_shutdown)
# do garbage collection before releasing the gpu to avoid releasing invalid pointers later
# note that atexit-registered calls are called in LIFO order
atexit.register(gc.collect)
except EnvironmentError, e:
cuda_available = False
cuda_initialization_error_message = e.message
......
......@@ -12,11 +12,43 @@
//If true, we fill with NAN allocated device memory.
#define ALLOC_MEMSET 0
#define DEBUG_GPU_CONTEXT_REFCOUNT 0
// g_gpu_context_refcount starts at one b/c the gpu context will be implicitly created
// on the first successful cuda call. the matching decref is in CudaNdarray_gpu_shutdown.
static int g_gpu_context_refcount = 1;
///////////////////////////
// cuda context management
///////////////////////////
void gpu_context_incref() {
g_gpu_context_refcount++;
#if DEBUG_GPU_CONTEXT_REFCOUNT
fprintf(stderr, "gpu_context_incref, to %d\n", g_gpu_context_refcount);
#endif
}
void gpu_context_decref() {
g_gpu_context_refcount--;
#if DEBUG_GPU_CONTEXT_REFCOUNT
fprintf(stderr, "gpu_context_decref, to %d\n", g_gpu_context_refcount);
#endif
if(g_gpu_context_refcount == 0) {
// we're now free to close the cuda context; if we don't explicitly
// exit our cuda context, some systems segfault on process exit
// for as-yet unknown reasons; see
// http://groups.google.com/group/theano-users/browse_thread/thread/c351846e5cebe35f
cudaThreadExit();
#if DEBUG_GPU_CONTEXT_REFCOUNT
fprintf(stderr, "gpu_context_decref at 0, calling cudaThreadExit\n");
#endif
}
}
/////////////////////////
// Alloc and Free
/////////////////////////
/**
*
* In the test program I'm using, the _outstanding_mallocs decreases with every call.
......@@ -48,6 +80,9 @@ void * device_malloc(size_t size)
return NULL;
}
_outstanding_mallocs[0] += (rval != NULL);
if(rval != NULL) {
gpu_context_incref(); // keep the gpu context around until we've free this memory
}
#if COMPUTE_GPU_MEM_USED
for(int i=0;i<TABLE_SIZE;i++){
if(NULL==_alloc_size_table[i].ptr){
......@@ -81,6 +116,9 @@ int device_free(void *ptr)
return -1;
}
_outstanding_mallocs[0] -= (ptr != NULL);
if(ptr != NULL) {
gpu_context_decref();
}
#if COMPUTE_GPU_MEM_USED
int i=0;
for(;i<TABLE_SIZE;i++)
......@@ -1888,7 +1926,7 @@ CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
PyObject *
CudaNdarray_gpu_shutdown(PyObject* _unused, PyObject* _unused_args) {
cudaThreadExit();
gpu_context_decref();
Py_INCREF(Py_None);
return Py_None;
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论