提交 629c173b authored 作者: David Warde-Farley's avatar David Warde-Farley

Added a verbosity flag to device_malloc.

上级 e41ba9bd
...@@ -53,7 +53,7 @@ struct table_struct{ ...@@ -53,7 +53,7 @@ struct table_struct{
}; };
table_struct _alloc_size_table[TABLE_SIZE]; table_struct _alloc_size_table[TABLE_SIZE];
#endif #endif
void * device_malloc(size_t size) void * device_malloc(size_t size, int verbose)
{ {
void * rval=NULL; void * rval=NULL;
cudaError_t err = cudaMalloc(&rval, size); cudaError_t err = cudaMalloc(&rval, size);
...@@ -64,11 +64,14 @@ void * device_malloc(size_t size) ...@@ -64,11 +64,14 @@ void * device_malloc(size_t size)
// it returns something else I still don't see why we should ignore // it returns something else I still don't see why we should ignore
// it. All we want to do here is reset the flag. // it. All we want to do here is reset the flag.
cudaGetLastError(); cudaGetLastError();
if (verbose)
{
#if COMPUTE_GPU_MEM_USED #if COMPUTE_GPU_MEM_USED
fprintf(stderr, "Error allocating %li bytes of device memory (%s). new total bytes allocated: %d\n", (long)size, cudaGetErrorString(err),_allocated_size); fprintf(stderr, "Error allocating %li bytes of device memory (%s). new total bytes allocated: %d\n", (long)size, cudaGetErrorString(err),_allocated_size);
#else #else
fprintf(stderr, "Error allocating %li bytes of device memory (%s).\n", (long)size, cudaGetErrorString(err)); fprintf(stderr, "Error allocating %li bytes of device memory (%s).\n", (long)size, cudaGetErrorString(err));
#endif #endif
}
PyErr_Format(PyExc_MemoryError, PyErr_Format(PyExc_MemoryError,
"Error allocating %li bytes of device memory (%s).", (long)size, cudaGetErrorString(err)); "Error allocating %li bytes of device memory (%s).", (long)size, cudaGetErrorString(err));
return NULL; return NULL;
...@@ -959,7 +962,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){ ...@@ -959,7 +962,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
// Create the memory place that will store the error information. // Create the memory place that will store the error information.
if (err_var == NULL) { if (err_var == NULL) {
err_var = (int*)device_malloc(sizeof(int)); err_var = (int*)device_malloc(sizeof(int), VERBOSE_DEVICE_MALLOC);
if (!err_var) { // PyErr set by device_malloc if (!err_var) { // PyErr set by device_malloc
Py_DECREF(indices); Py_DECREF(indices);
Py_DECREF(out); Py_DECREF(out);
...@@ -2625,7 +2628,7 @@ static __global__ void get_gpu_ptr_size(int* dst) ...@@ -2625,7 +2628,7 @@ static __global__ void get_gpu_ptr_size(int* dst)
PyObject * PyObject *
CudaNdarray_ptr_int_size(PyObject* _unused, PyObject* args) CudaNdarray_ptr_int_size(PyObject* _unused, PyObject* args)
{ {
int *gpu_data = (int*)device_malloc(sizeof(int)*2); int *gpu_data = (int*)device_malloc(sizeof(int)*2, VERBOSE_DEVICE_MALLOC);
if(gpu_data == NULL){ if(gpu_data == NULL){
return PyErr_Format(PyExc_MemoryError, return PyErr_Format(PyExc_MemoryError,
"CudaNdarray_ptr_int_size: Can't allocate memory on the gpu."); "CudaNdarray_ptr_int_size: Can't allocate memory on the gpu.");
...@@ -4521,7 +4524,7 @@ cnda_copy_structure_to_device(const CudaNdarray * self) ...@@ -4521,7 +4524,7 @@ cnda_copy_structure_to_device(const CudaNdarray * self)
int struct_size = cnda_structure_size(self->nd); int struct_size = cnda_structure_size(self->nd);
if (struct_size) if (struct_size)
{ {
self->dev_structure = (int*)device_malloc(struct_size* sizeof(int)); self->dev_structure = (int*)device_malloc(struct_size* sizeof(int), VERBOSE_DEVICE_MALLOC);
if (NULL == self->dev_structure) if (NULL == self->dev_structure)
{ {
return -1; return -1;
......
...@@ -42,13 +42,16 @@ typedef float real; ...@@ -42,13 +42,16 @@ typedef float real;
#define SHARED_SIZE (16*1024) #define SHARED_SIZE (16*1024)
#endif #endif
#define VERBOSE_DEVICE_MALLOC 1
#define NO_VERBOSE_DEVICE_MALLOC 0
/** /**
* Allocation and freeing of device memory should go through these functions so that the lib can track memory usage. * Allocation and freeing of device memory should go through these functions so that the lib can track memory usage.
* *
* device_malloc will set the Python error message before returning None. * device_malloc will set the Python error message before returning None.
* device_free will return nonzero on failure (after setting the python error message) * device_free will return nonzero on failure (after setting the python error message)
*/ */
DllExport void * device_malloc(size_t size); DllExport void * device_malloc(size_t size, int verbose);
DllExport int device_free(void * ptr); DllExport int device_free(void * ptr);
template <typename T> template <typename T>
...@@ -335,7 +338,7 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i ...@@ -335,7 +338,7 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i
return -1; return -1;
} }
self->devdata = (float*)device_malloc(size*sizeof(real)); self->devdata = (float*)device_malloc(size*sizeof(real), VERBOSE_DEVICE_MALLOC);
if (size && !self->devdata) if (size && !self->devdata)
{ {
CudaNdarray_set_nd(self, -1); CudaNdarray_set_nd(self, -1);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论