提交 4345523c authored 作者: Frederic's avatar Frederic

Add a Theano flags to enable cumem

上级 4ae044bf
...@@ -54,6 +54,12 @@ AddConfigVar('cublas.lib', ...@@ -54,6 +54,12 @@ AddConfigVar('cublas.lib',
"""Name of the cuda blas library for the linker.""", """Name of the cuda blas library for the linker.""",
StrParam('cublas')) StrParam('cublas'))
AddConfigVar('lib.cumem',
"""Do we enable cumem or not.""",
# We should not mix both allocator, so we can't override
BoolParam(False, allow_override=False),
in_c_key=False)
# is_nvcc_available called here to initialize global vars in # is_nvcc_available called here to initialize global vars in
# nvcc_compiler module # nvcc_compiler module
nvcc_compiler.is_nvcc_available() nvcc_compiler.is_nvcc_available()
...@@ -377,7 +383,7 @@ def use(device, ...@@ -377,7 +383,7 @@ def use(device,
try: try:
if (device != 'gpu') and not pycuda_init_dev: if (device != 'gpu') and not pycuda_init_dev:
assert isinstance(device, int) assert isinstance(device, int)
gpu_init(device) gpu_init(device, config.lib.cumem)
use.device_number = device use.device_number = device
assert active_device_number() == device assert active_device_number() == device
else: else:
...@@ -390,7 +396,7 @@ def use(device, ...@@ -390,7 +396,7 @@ def use(device,
cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((2, 3)) cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((2, 3))
use.device_number = active_device_number() use.device_number = active_device_number()
# This is needed to initialize the cublas handle. # This is needed to initialize the cublas handle.
gpu_init(use.device_number) gpu_init(use.device_number, config.lib.cumem)
if test_driver: if test_driver:
import theano.sandbox.cuda.tests.test_driver import theano.sandbox.cuda.tests.test_driver
......
...@@ -71,7 +71,7 @@ void * device_malloc(size_t size) ...@@ -71,7 +71,7 @@ void * device_malloc(size_t size)
} }
///@TODO: thejaswi: link this option to a theano config variable? ///@TODO: thejaswi: link this option to a theano config variable?
static bool g_use_cumem = true; static bool g_use_cumem = false;
static const int g_max_devices = 8; static const int g_max_devices = 8;
int initCumem() { int initCumem() {
static bool cumemInitialized = false; static bool cumemInitialized = false;
...@@ -3093,18 +3093,22 @@ CudaNdarray_ptr_int_size(PyObject* _unused, PyObject* args) ...@@ -3093,18 +3093,22 @@ CudaNdarray_ptr_int_size(PyObject* _unused, PyObject* args)
static int cublas_init(); static int cublas_init();
static void cublas_shutdown(); static void cublas_shutdown();
// Initialize the gpu. // Initialize the gpu.
// Takes one optional parameter, the device number. // Takes two optional parameters, the device number and if we should use cumem.
// If provided, it sets that device to be the active device. // If the device number is provided, it sets that device to be the active device.
// If not provided (usually just to test whether the gpu is available at all), // If not provided (usually just to test whether the gpu is available at all),
// it does not set an active device. // it does not set an active device.
// Raises EnvironmentError or ValueError (as appropriate) if the initialization failed. // Raises EnvironmentError or ValueError (as appropriate) if the initialization failed.
// cumem is threaded like a bool. If converted to 0, don't use cumem. Otherwise, use it.
PyObject * PyObject *
CudaNdarray_gpu_init(PyObject* _unused, PyObject* args) CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
{ {
int card_nb = 0; int card_nb = 0;
int card_number_provided = 1; int card_number_provided = 1;
int cumem = 0; // 0 False, 1 True
PyArg_ParseTuple(args, "|i", &card_nb); // if we're given something wildly invalid, this will throw a TypeError // if we're given something wildly invalid, this will throw a TypeError
PyArg_ParseTuple(args, "|ii", &card_nb, &cumem);
if(cumem)
g_use_cumem = true;
if(PyTuple_Size(args) == 0) { if(PyTuple_Size(args) == 0) {
card_number_provided = 0; card_number_provided = 0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论