Add a Theano flags to enable cumem

4345523c · Frederic · 4ae044bf · 4345523c · 4345523c
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -54,6 +54,12 @@ AddConfigVar('cublas.lib',
        """Name of the cuda blas library for the linker.""",
        StrParam('cublas'))
+AddConfigVar('lib.cumem',
+             """Do we enable cumem or not.""",
+             # We should not mix both allocator, so we can't override
+             BoolParam(False, allow_override=False),
+             in_c_key=False)
 # is_nvcc_available called here to initialize global vars in
 # nvcc_compiler module
 nvcc_compiler.is_nvcc_available()
@@ -377,7 +383,7 @@ def use(device,
        try:
            if (device != 'gpu') and not pycuda_init_dev:
                assert isinstance(device, int)
-                gpu_init(device)
+                gpu_init(device, config.lib.cumem)
                use.device_number = device
                assert active_device_number() == device
            else:
@@ -390,7 +396,7 @@ def use(device,
                cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((2, 3))
                use.device_number = active_device_number()
                # This is needed to initialize the cublas handle.
-                gpu_init(use.device_number)
+                gpu_init(use.device_number, config.lib.cumem)
            if test_driver:
                import theano.sandbox.cuda.tests.test_driver

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -71,7 +71,7 @@ void * device_malloc(size_t size)
 }
 ///@TODO: thejaswi: link this option to a theano config variable?
-static bool g_use_cumem = true;
+static bool g_use_cumem = false;
 static const int g_max_devices = 8;
 int initCumem() {
    static bool cumemInitialized = false;
@@ -3093,18 +3093,22 @@ CudaNdarray_ptr_int_size(PyObject* _unused, PyObject* args)
 static int cublas_init();
 static void cublas_shutdown();
 // Initialize the gpu.
-// Takes one optional parameter, the device number.
+// Takes two optional parameters, the device number and if we should use cumem.
-// If provided, it sets that device to be the active device.
+// If the device number is provided, it sets that device to be the active device.
 // If not provided (usually just to test whether the gpu is available at all),
 // it does not set an active device.
 // Raises EnvironmentError or ValueError (as appropriate) if the initialization failed.
+// cumem is threaded like a bool. If converted to 0, don't use cumem. Otherwise, use it.
 PyObject *
 CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
 {
    int card_nb = 0;
    int card_number_provided = 1;
+    int cumem = 0; // 0 False, 1 True
-    PyArg_ParseTuple(args, "|i", &card_nb); // if we're given something wildly invalid, this will throw a TypeError
+    // if we're given something wildly invalid, this will throw a TypeError
+    PyArg_ParseTuple(args, "|ii", &card_nb, &cumem);
+    if(cumem)
+        g_use_cumem = true;
    if(PyTuple_Size(args) == 0) {
        card_number_provided = 0;