update lib.cnmem to be a float with new definition

ed1fca7a · Frederic · 840f1011 · ed1fca7a · ed1fca7a
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -14,7 +14,7 @@ from theano.gof import EquilibriumDB, SequenceDB
 from theano.gof.cmodule import get_lib_extension
 from theano.gof.compilelock import get_lock, release_lock
 from theano.configparser import (
-    config, AddConfigVar, BoolParam, IntParam, StrParam)
+    config, AddConfigVar, BoolParam, FloatParam, StrParam)
 from . import nvcc_compiler

 # ignore_newtrees is to speed the optimization as this is the pattern
@@ -56,16 +56,18 @@ AddConfigVar('cublas.lib',
        StrParam('cublas'))

 AddConfigVar('lib.cnmem',
-             """Do we enable CNMeM or not (a faster memory allocator).
+             """Do we enable CNMeM or not (a faster CUDA memory allocator).

-             The number (in MB) represent the start size of the memory pool.
+             The parameter represent the start size (in MB or % of
+             total GPU memory) of the memory pool.

             0: not enabled.
-             -1: use half GPU memory.
-             >0: use that number of MB of memory.""",
+             0 < N <= 1: % of the total GPU memory (clipped to .985 for driver memory)
+             > 0: use that number of MB of memory.
+
+             """,
             # We should not mix both allocator, so we can't override
-             # BoolParam(False, allow_override=False),
-             IntParam(0, lambda i: i >= 0 or i == -1, allow_override=False),
+             FloatParam(0, lambda i: i >= 0, allow_override=False),
             in_c_key=False)

 # is_nvcc_available called here to initialize global vars in

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -3141,9 +3141,9 @@ CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
 {
    int card_nb = 0;
    int card_number_provided = 1;
-    int cnmem = 0; // start qt memory in MB.
+    float cnmem = 0; // Theano flag lib.cnmem
    // if we're given something wildly invalid, this will throw a TypeError
-    PyArg_ParseTuple(args, "|ii", &card_nb, &cnmem);
+    PyArg_ParseTuple(args, "|if", &card_nb, &cnmem);
    if(cnmem)
        g_use_cnmem = true;

@@ -3202,13 +3202,27 @@ CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
    }
    if(card_number_provided && g_use_cnmem) {
        size_t mem = 0;
-        if (cnmem > 0)
+        if (cnmem > 1)
            mem = cnmem * 1024 * 1024;
-        else if (cnmem != -1){
-            return PyErr_Format(
-                PyExc_EnvironmentError,
-                "CNMeM init: The config flag must be 0 (disabled),"
-                " -1: use half the GPU memory, > 0: that memory in MB.");
+        else{
+            // Clip to 98.5% to let memory for the driver.
+            if (cnmem > .985){
+                cnmem = .985;
+            }
+            size_t free = 0, total = 0;
+            cudaError_t err = cudaMemGetInfo(&free, &total);
+            if (err != cudaSuccess){
+                // Clear the error flag, cudaMemGetInfo doesn't do it.
+                // Currently this returns the same thing as err, but if in future
+                // it returns something else I still don't see why we should ignore
+                // it.  All we want to do here is reset the flag.
+                cudaGetLastError();
+                PyErr_Format(PyExc_RuntimeError,
+                             "Error while getting memory info about the gpu: %s",
+                             cudaGetErrorString(err));
+                return NULL;
+            }
+            mem = total * cnmem;
        }
        if(initCnmem(card_number_provided, card_nb, mem) == -1){
            return NULL;