提交 4345523c authored 作者: Frederic's avatar Frederic

Add a Theano flags to enable cumem

上级 4ae044bf
......@@ -54,6 +54,12 @@ AddConfigVar('cublas.lib',
"""Name of the cuda blas library for the linker.""",
StrParam('cublas'))
AddConfigVar('lib.cumem',
"""Do we enable cumem or not.""",
# We should not mix both allocator, so we can't override
BoolParam(False, allow_override=False),
in_c_key=False)
# is_nvcc_available called here to initialize global vars in
# nvcc_compiler module
nvcc_compiler.is_nvcc_available()
......@@ -377,7 +383,7 @@ def use(device,
try:
if (device != 'gpu') and not pycuda_init_dev:
assert isinstance(device, int)
gpu_init(device)
gpu_init(device, config.lib.cumem)
use.device_number = device
assert active_device_number() == device
else:
......@@ -390,7 +396,7 @@ def use(device,
cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((2, 3))
use.device_number = active_device_number()
# This is needed to initialize the cublas handle.
gpu_init(use.device_number)
gpu_init(use.device_number, config.lib.cumem)
if test_driver:
import theano.sandbox.cuda.tests.test_driver
......
......@@ -71,7 +71,7 @@ void * device_malloc(size_t size)
}
///@TODO: thejaswi: link this option to a theano config variable?
static bool g_use_cumem = true;
static bool g_use_cumem = false;
static const int g_max_devices = 8;
int initCumem() {
static bool cumemInitialized = false;
......@@ -3093,18 +3093,22 @@ CudaNdarray_ptr_int_size(PyObject* _unused, PyObject* args)
static int cublas_init();
static void cublas_shutdown();
// Initialize the gpu.
// Takes one optional parameter, the device number.
// If provided, it sets that device to be the active device.
// Takes two optional parameters, the device number and if we should use cumem.
// If the device number is provided, it sets that device to be the active device.
// If not provided (usually just to test whether the gpu is available at all),
// it does not set an active device.
// Raises EnvironmentError or ValueError (as appropriate) if the initialization failed.
// cumem is threaded like a bool. If converted to 0, don't use cumem. Otherwise, use it.
PyObject *
CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
{
int card_nb = 0;
int card_number_provided = 1;
PyArg_ParseTuple(args, "|i", &card_nb); // if we're given something wildly invalid, this will throw a TypeError
int cumem = 0; // 0 False, 1 True
// if we're given something wildly invalid, this will throw a TypeError
PyArg_ParseTuple(args, "|ii", &card_nb, &cumem);
if(cumem)
g_use_cumem = true;
if(PyTuple_Size(args) == 0) {
card_number_provided = 0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论