提交 ed1fca7a authored 作者: Frederic's avatar Frederic

update lib.cnmem to be a float with new definition

上级 840f1011
......@@ -14,7 +14,7 @@ from theano.gof import EquilibriumDB, SequenceDB
from theano.gof.cmodule import get_lib_extension
from theano.gof.compilelock import get_lock, release_lock
from theano.configparser import (
config, AddConfigVar, BoolParam, IntParam, StrParam)
config, AddConfigVar, BoolParam, FloatParam, StrParam)
from . import nvcc_compiler
# ignore_newtrees is to speed the optimization as this is the pattern
......@@ -56,16 +56,18 @@ AddConfigVar('cublas.lib',
StrParam('cublas'))
AddConfigVar('lib.cnmem',
"""Do we enable CNMeM or not (a faster memory allocator).
"""Do we enable CNMeM or not (a faster CUDA memory allocator).
The number (in MB) represent the start size of the memory pool.
The parameter represent the start size (in MB or % of
total GPU memory) of the memory pool.
0: not enabled.
-1: use half GPU memory.
>0: use that number of MB of memory.""",
0 < N <= 1: % of the total GPU memory (clipped to .985 for driver memory)
> 0: use that number of MB of memory.
""",
# We should not mix both allocator, so we can't override
# BoolParam(False, allow_override=False),
IntParam(0, lambda i: i >= 0 or i == -1, allow_override=False),
FloatParam(0, lambda i: i >= 0, allow_override=False),
in_c_key=False)
# is_nvcc_available called here to initialize global vars in
......
......@@ -3141,9 +3141,9 @@ CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
{
int card_nb = 0;
int card_number_provided = 1;
int cnmem = 0; // start qt memory in MB.
float cnmem = 0; // Theano flag lib.cnmem
// if we're given something wildly invalid, this will throw a TypeError
PyArg_ParseTuple(args, "|ii", &card_nb, &cnmem);
PyArg_ParseTuple(args, "|if", &card_nb, &cnmem);
if(cnmem)
g_use_cnmem = true;
......@@ -3202,13 +3202,27 @@ CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
}
if(card_number_provided && g_use_cnmem) {
size_t mem = 0;
if (cnmem > 0)
if (cnmem > 1)
mem = cnmem * 1024 * 1024;
else if (cnmem != -1){
return PyErr_Format(
PyExc_EnvironmentError,
"CNMeM init: The config flag must be 0 (disabled),"
" -1: use half the GPU memory, > 0: that memory in MB.");
else{
// Clip to 98.5% to let memory for the driver.
if (cnmem > .985){
cnmem = .985;
}
size_t free = 0, total = 0;
cudaError_t err = cudaMemGetInfo(&free, &total);
if (err != cudaSuccess){
// Clear the error flag, cudaMemGetInfo doesn't do it.
// Currently this returns the same thing as err, but if in future
// it returns something else I still don't see why we should ignore
// it. All we want to do here is reset the flag.
cudaGetLastError();
PyErr_Format(PyExc_RuntimeError,
"Error while getting memory info about the gpu: %s",
cudaGetErrorString(err));
return NULL;
}
mem = total * cnmem;
}
if(initCnmem(card_number_provided, card_nb, mem) == -1){
return NULL;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论