Merge pull request #4488 from abergeron/disable_preallocate

Make negative values for gpuarray.preallocate disable the allocation cache completely.

Merge pull request #4488 from abergeron/disable_preallocate
f512a560 · Frédéric Bastien · 69338f33 · 0944780b · f512a560 · f512a560
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -442,6 +442,42 @@ import theano and print the config variable, as in:
        automatically to get more memory. But this can cause
        fragmentation, see note above.
+.. attribute:: config.gpuarray.preallocate
+    Float value
+    Default: 0
+    Controls the preallocation of memory with the gpuarray backend.
+    The value represents the start size (either in MB or the fraction
+    of total GPU memory) of the memory pool. If more memory is needed,
+    Theano will try to obtain more, but this can cause memory
+    fragmentation.
+    A negative value will completely disable the allocation cache.
+    This can have a severe impact on performance and so should not be
+    done outside of debugging.
+        * < 0: disabled
+        * 0 <= N <= 1: use this fraction of the total GPU memory (clipped to .95 for driver memory).
+        * > 1: use this number in megabytes (MB) of memory.
+    .. note::
+        This could cause memory fragmentation. So if you have a
+        memory error while using CNMeM, try to allocate more memory at
+        the start or disable it. If you try this, report your result
+        on :ref`theano-dev`.
+    .. note::
+        The clipping at 95% can be bypassed by specifing the exact
+        number of megabytes. If more then 95% are needed, it will try
+        automatically to get more memory. But this can cause
+        fragmentation, see note above.
 .. attribute:: linker
    String value: 'c|py', 'py', 'c', 'c|py_nogc'

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -235,11 +235,12 @@ AddConfigVar('gpuarray.sync',
             in_c_key=True)
 AddConfigVar('gpuarray.preallocate',
-             """If 0 it doesn't do anything.  If between 0 and 1 it
+             """If negative it disables the allocation cache. If
-             will preallocate that fraction of the total GPU memory.
+             between 0 and 1 it enables the allocation cache and
-             If 1 or greater it will preallocate that amount of memory
+             preallocates that fraction of the total GPU memory.  If 1
-             (in megabytes).""",
+             or greater it will preallocate that amount of memory (in
-             FloatParam(0, lambda i: i >= 0),
+             megabytes).""",
+             FloatParam(0),
             in_c_key=False)

--- a/theano/gpuarray/__init__.py
+++ b/theano/gpuarray/__init__.py
@@ -51,40 +51,43 @@ def init_dev(dev, name=None):
                           "Please update libgpuarray/pygpu.")
    global pygpu_activated
    if dev not in init_dev.devmap:
-        ctx = pygpu.init(dev)
+        ctx = pygpu.init(dev,
+                         disable_alloc_cache=config.gpuarray.preallocate < 0)
        init_dev.devmap[dev] = ctx
-        if config.gpuarray.preallocate != 0:
+        if config.gpuarray.preallocate > 0:
-            if config.gpuarray.preallocate < 1:
+            MB = (1024 * 1024)
-                gmem = min(config.gpuarray.preallocate, 0.98) * ctx.total_gmem
+            if config.gpuarray.preallocate <= 1:
+                gmem = min(config.gpuarray.preallocate, 0.95) * ctx.total_gmem
            else:
-                gmem = config.gpuarray.preallocate * (1024*1024)
+                gmem = config.gpuarray.preallocate * MB
            # This will allocate and immediatly free an object of size gmem
            # which will reserve that amount of memory on the GPU.
            pygpu.empty((gmem,), dtype='int8', context=ctx)
+            if config.print_active_device:
+                print("Preallocating %d/%d Mb (%f) on %s" %
+                      (gmem//MB, ctx.total_gmem//MB, gmem/ctx.total_gmem, dev),
+                      file=sys.stderr)
    context = init_dev.devmap[dev]
    # This will map the context name to the real context object.
    reg_context(name, context)
-    pygpu_activated = True
    if config.print_active_device:
-        warn = None
+        print("Mapped name %s to device %s: %s" %
-        cudnn_version = ""
+              (name, dev, context.devname),
-        if dev.startswith('cuda'):
-            cudnn_version = " (cuDNN not available)"
-            try:
-                cudnn_version = dnn.version()
-                # 5100 should not print warning with cudnn 5 final.
-                if cudnn_version > 5100:
-                    warn = ("Your cuDNN version is more recent than Theano."
-                            " If you see problems, try updating Theano or"
-                            " downgrading cuDNN to version 5.")
-                cudnn_version = " (cuDNN version %s)" % cudnn_version
-            except Exception:
-                cudnn_version = dnn.dnn_present.msg
-        print("Mapped name %s to device %s: %s%s" % (
-            name, dev, context.devname, cudnn_version),
              file=sys.stderr)
-        if warn:
+    pygpu_activated = True
-            warnings.warn(warn)
+    if dev.startswith('cuda'):
+        try:
+            cudnn_version = dnn.version()
+            # 5100 should not print warning with cudnn 5 final.
+            if cudnn_version > 5100:
+                warnings.warn("Your cuDNN version is more recent than Theano."
+                              " If you see problems, try updating Theano or"
+                              " downgrading cuDNN to version 5.")
+            if config.print_active_device:
+                print("Using cuDNN version %d on context %s" %
+                      (cudnn_version, name), file=sys.stderr)
+        except Exception:
+            pass
 # This maps things like 'cuda0' to the context object on that device.
 init_dev.devmap = {}

--- a/theano/gpuarray/type.py
+++ b/theano/gpuarray/type.py
@@ -463,7 +463,7 @@ class GpuArrayType(Type):
        ver = pygpu.gpuarray.api_version()
        # we only use the major version since the minor revision are
        # API-compatible.
-        return (1, ver[0])
+        return (2, ver[0])
 class _operators(_tensor_py_operators):