提交 6e108bc4 authored 作者: Frederic's avatar Frederic

In the memory profile, print the max memory used on the GPU.

上级 da995a27
...@@ -659,6 +659,14 @@ class ProfileStats(object): ...@@ -659,6 +659,14 @@ class ProfileStats(object):
node_memory_saved_by_inplace / 1024.)) node_memory_saved_by_inplace / 1024.))
print " Memory saved by GC (KB)", int(round(( print " Memory saved by GC (KB)", int(round((
node_memory_size - running_max_memory_size) / 1024.)) node_memory_size - running_max_memory_size) / 1024.))
if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and
hasattr(theano.sandbox.cuda, 'cuda_ndarray') and
hasattr(theano.sandbox.cuda.cuda_ndarray.cuda_ndarray,
'theano_allocated')):
_, gpu_max = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.theano_allocated()
print " Max Memory allocated on the GPU(for all functions) (KB)", int(round(
gpu_max / 1024.))
print print
print " <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>" print " <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>"
......
...@@ -44,8 +44,11 @@ static PyObject *CudaNdarray_get_shape(CudaNdarray *self, void *closure); ...@@ -44,8 +44,11 @@ static PyObject *CudaNdarray_get_shape(CudaNdarray *self, void *closure);
* *
*/ */
int _outstanding_mallocs[] = {0,0}; int _outstanding_mallocs[] = {0,0};
#if COMPUTE_GPU_MEM_USED #if COMPUTE_GPU_MEM_USED
int _allocated_size = 0; int _allocated_size = 0;
int _max_allocated_size = 0;
const int TABLE_SIZE = 10000; const int TABLE_SIZE = 10000;
struct table_struct{ struct table_struct{
void* ptr; void* ptr;
...@@ -82,8 +85,15 @@ void * device_malloc(size_t size, int verbose) ...@@ -82,8 +85,15 @@ void * device_malloc(size_t size, int verbose)
"Error allocating %li bytes of device memory (%s).", (long)size, cudaGetErrorString(err)); "Error allocating %li bytes of device memory (%s).", (long)size, cudaGetErrorString(err));
return NULL; return NULL;
} }
_outstanding_mallocs[0] += (rval != NULL); if (rval != NULL){
#if COMPUTE_GPU_MEM_USED // Can it happen that cudaMalloc return cudaSuccess, but return a NULL ptr?
// Could this be what happen if size is 0?
_outstanding_mallocs[0] += 1;
#if COMPUTE_GPU_MEM_USED
_allocated_size += size;
_max_allocated_size = std::max(_max_allocated_size, _allocated_size);
for(int i=0;i<TABLE_SIZE;i++){ for(int i=0;i<TABLE_SIZE;i++){
if(NULL==_alloc_size_table[i].ptr){ if(NULL==_alloc_size_table[i].ptr){
_alloc_size_table[i].ptr=rval; _alloc_size_table[i].ptr=rval;
...@@ -91,8 +101,8 @@ void * device_malloc(size_t size, int verbose) ...@@ -91,8 +101,8 @@ void * device_malloc(size_t size, int verbose)
break; break;
} }
} }
_allocated_size += size; #endif
#endif }
//fprintf(stderr, //fprintf(stderr,
//"allocated %li bytes of device memory (%s). new total bytes allocated: %d. ptr: %p\n", //"allocated %li bytes of device memory (%s). new total bytes allocated: %d. ptr: %p\n",
//(long)size, cudaGetErrorString(err),_allocated_size,rval); //(long)size, cudaGetErrorString(err),_allocated_size,rval);
...@@ -2517,6 +2527,7 @@ CudaNdarray_synchronize(PyObject* _unused, PyObject* dummy) ...@@ -2517,6 +2527,7 @@ CudaNdarray_synchronize(PyObject* _unused, PyObject* dummy)
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
#if COMPUTE_GPU_MEM_USED #if COMPUTE_GPU_MEM_USED
/* /*
* Return the size in bytes that Theano currently have allocated on the gpu. * Return the size in bytes that Theano currently have allocated on the gpu.
...@@ -2524,7 +2535,8 @@ CudaNdarray_synchronize(PyObject* _unused, PyObject* dummy) ...@@ -2524,7 +2535,8 @@ CudaNdarray_synchronize(PyObject* _unused, PyObject* dummy)
PyObject * PyObject *
GetTheanoAllocInfo(PyObject* _unused, PyObject* dummy) GetTheanoAllocInfo(PyObject* _unused, PyObject* dummy)
{ {
return PyLong_FromLong(_allocated_size); PyObject* tuple = Py_BuildValue("(ii)", _allocated_size, _max_allocated_size);
return tuple;
} }
#endif #endif
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论