提交 29a43a50 authored 作者: Frederic Bastien's avatar Frederic Bastien

merge

import atexit, gc, os, stat import atexit, os, stat
from theano.compile import optdb from theano.compile import optdb
from theano import config from theano import config
...@@ -96,9 +96,6 @@ if cuda_available: ...@@ -96,9 +96,6 @@ if cuda_available:
cuda_initialization_error_message = "" cuda_initialization_error_message = ""
# actively closing our gpu session presents segfault-on-exit on some systems # actively closing our gpu session presents segfault-on-exit on some systems
atexit.register(gpu_shutdown) atexit.register(gpu_shutdown)
# do garbage collection before releasing the gpu to avoid releasing invalid pointers later
# note that atexit-registered calls are called in LIFO order
atexit.register(gc.collect)
except EnvironmentError, e: except EnvironmentError, e:
cuda_available = False cuda_available = False
cuda_initialization_error_message = e.message cuda_initialization_error_message = e.message
......
...@@ -12,11 +12,43 @@ ...@@ -12,11 +12,43 @@
//If true, we fill with NAN allocated device memory. //If true, we fill with NAN allocated device memory.
#define ALLOC_MEMSET 0 #define ALLOC_MEMSET 0
#define DEBUG_GPU_CONTEXT_REFCOUNT 0
// g_gpu_context_refcount starts at one b/c the gpu context will be implicitly created
// on the first successful cuda call. the matching decref is in CudaNdarray_gpu_shutdown.
static int g_gpu_context_refcount = 1;
///////////////////////////
// cuda context management
///////////////////////////
void gpu_context_incref() {
g_gpu_context_refcount++;
#if DEBUG_GPU_CONTEXT_REFCOUNT
fprintf(stderr, "gpu_context_incref, to %d\n", g_gpu_context_refcount);
#endif
}
void gpu_context_decref() {
g_gpu_context_refcount--;
#if DEBUG_GPU_CONTEXT_REFCOUNT
fprintf(stderr, "gpu_context_decref, to %d\n", g_gpu_context_refcount);
#endif
if(g_gpu_context_refcount == 0) {
// we're now free to close the cuda context; if we don't explicitly
// exit our cuda context, some systems segfault on process exit
// for as-yet unknown reasons; see
// http://groups.google.com/group/theano-users/browse_thread/thread/c351846e5cebe35f
cudaThreadExit();
#if DEBUG_GPU_CONTEXT_REFCOUNT
fprintf(stderr, "gpu_context_decref at 0, calling cudaThreadExit\n");
#endif
}
}
///////////////////////// /////////////////////////
// Alloc and Free // Alloc and Free
///////////////////////// /////////////////////////
/** /**
* *
* In the test program I'm using, the _outstanding_mallocs decreases with every call. * In the test program I'm using, the _outstanding_mallocs decreases with every call.
...@@ -48,6 +80,9 @@ void * device_malloc(size_t size) ...@@ -48,6 +80,9 @@ void * device_malloc(size_t size)
return NULL; return NULL;
} }
_outstanding_mallocs[0] += (rval != NULL); _outstanding_mallocs[0] += (rval != NULL);
if(rval != NULL) {
gpu_context_incref(); // keep the gpu context around until we've free this memory
}
#if COMPUTE_GPU_MEM_USED #if COMPUTE_GPU_MEM_USED
for(int i=0;i<TABLE_SIZE;i++){ for(int i=0;i<TABLE_SIZE;i++){
if(NULL==_alloc_size_table[i].ptr){ if(NULL==_alloc_size_table[i].ptr){
...@@ -81,6 +116,9 @@ int device_free(void *ptr) ...@@ -81,6 +116,9 @@ int device_free(void *ptr)
return -1; return -1;
} }
_outstanding_mallocs[0] -= (ptr != NULL); _outstanding_mallocs[0] -= (ptr != NULL);
if(ptr != NULL) {
gpu_context_decref();
}
#if COMPUTE_GPU_MEM_USED #if COMPUTE_GPU_MEM_USED
int i=0; int i=0;
for(;i<TABLE_SIZE;i++) for(;i<TABLE_SIZE;i++)
...@@ -1888,7 +1926,7 @@ CudaNdarray_gpu_init(PyObject* _unused, PyObject* args) ...@@ -1888,7 +1926,7 @@ CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
PyObject * PyObject *
CudaNdarray_gpu_shutdown(PyObject* _unused, PyObject* _unused_args) { CudaNdarray_gpu_shutdown(PyObject* _unused, PyObject* _unused_args) {
cudaThreadExit(); gpu_context_decref();
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论