Merge pull request #5735 from abergeron/disk_cache

Disk cache option for gpuarray

Merge pull request #5735 from abergeron/disk_cache
f38289c9 · Pascal Lamblin · GitHub · 00935985 · e7582189 · f38289c9
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -431,6 +431,7 @@ import theano and print the config variable, as in:
        automatically to get more memory. But this can cause
        fragmentation, see note above.

+
 .. attribute:: config.gpuarray.sched

    String value: ``'default'``, ``'multi'``, ``'single'``
@@ -462,6 +463,13 @@ import theano and print the config variable, as in:
    performance, check both options.


+.. attribute:: config.gpuarray.cache_path
+
+   Default: ``config.compiledir``/gpuarray_kernels
+
+   Directory to cache pre-compiled kernels for the gpuarray backend.
+
+
 .. attribute:: linker

    String value: ``'c|py'``, ``'py'``, ``'c'``, ``'c|py_nogc'``

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -491,33 +491,6 @@ AddConfigVar(
    EnumStr('warn', 'raise', 'pdb', 'ignore'),
    in_c_key=False)

-
-def safe_no_home(home):
-    """
-    Make sure the user is not attempting to use `config.home`.
-
-    This config option was removed in Thenao 0.5 since it was redundant with
-    `config.base_compiledir`. This filter function ensures people who were
-    setting the location of their compilation directory through `config.home`
-    switch to `config.basecompiledir` instead, by raising an error when
-    `config.home` is used.
-    """
-    if home:
-        raise RuntimeError(
-            'The `config.home` option has been removed and should not be '
-            'used anymore. Please set the `config.base_compiledir` option '
-            'instead (for instance to: %s)' %
-            os.path.join(home, '.theano'))
-    return True
-
-
-AddConfigVar(
-    'home',
-    "This config option was removed in 0.5: do not use it!",
-    ConfigParam('', allow_override=False, filter=safe_no_home),
-    in_c_key=False)
-
-
 AddConfigVar(
    'nocleanup',
    "Suppress the deletion of code files that did not compile cleanly",
@@ -1708,13 +1681,22 @@ def default_compiledir():
 AddConfigVar(
    'compiledir',
    "platform-dependent cache directory for compiled modules",
-
    ConfigParam(
        default_compiledir,
        filter=filter_compiledir,
        allow_override=False),
    in_c_key=False)

+AddConfigVar(
+    'gpuarray.cache_path',
+    'Directory to cache pre-compiled kernels for the gpuarray backend.',
+    ConfigParam(
+        lambda: os.path.join(config.compiledir, 'gpuarray_kernels'),
+        filter=filter_base_compiledir,
+        allow_override=False),
+    in_c_key=False)
+
+
 # Check if there are remaining flags provided by the user through THEANO_FLAGS.
 for key in THEANO_FLAGS_DICT.keys():
    warnings.warn('Theano does not recognise this flag: {0}'.format(key))
--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -750,6 +750,9 @@ class ModuleCache(object):
            if subdirs_elem == 'lock_dir':
                continue
            root = os.path.join(self.dirname, subdirs_elem)
+            # Don't delete the gpuarray kernel cache
+            if root == config.gpuarray.cache_path:
+                continue
            key_pkl = os.path.join(root, 'key.pkl')
            if key_pkl in self.loaded_key_pkl:
                continue

--- a/theano/gpuarray/__init__.py
+++ b/theano/gpuarray/__init__.py
 from __future__ import absolute_import, print_function, division
 import sys
+import os
 import logging
-import sys
 import warnings

 import theano
@@ -31,6 +31,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
 from .basic_ops import as_gpuarray_variable
 from . import fft, dnn, opt, nerv, extra_ops, multinomial, reduction

+
 def transfer(x, target):
    try:
        get_context(target)
@@ -41,7 +42,7 @@ def transfer(x, target):
 register_transfer(transfer)


-def init_dev(dev, name=None):
+def init_dev(dev, name=None, preallocate=None):
    global pygpu_activated
    if not config.cxx:
        raise RuntimeError("The new gpu-backend need a c++ compiler.")
@@ -53,9 +54,13 @@ def init_dev(dev, name=None):
        raise ValueError(
            "Your installed libgpuarray is not in sync, please make sure to have the appropriate version")
    if dev not in init_dev.devmap:
+        if config.gpuarray.cache_path != '':
+            os.environ['GPUARRAY_CACHE_PATH'] = config.gpuarray.cache_path
+        if preallocate is None:
+            preallocate = config.gpuarray.preallocate
        context = pygpu.init(
            dev,
-            disable_alloc_cache=config.gpuarray.preallocate < 0,
+            disable_alloc_cache=preallocate < 0,
            single_stream=config.gpuarray.single_stream,
            sched=config.gpuarray.sched)
        context.dev = dev
@@ -73,14 +78,14 @@ def init_dev(dev, name=None):
                else:
                    print("Can not use cuDNN on context %s: %s" % (name, dnn.dnn_available.msg),
                          file=sys.stderr)
-        if config.gpuarray.preallocate < 0:
+        if preallocate < 0:
            print("Disabling allocation cache on %s" % (dev,))
-        elif config.gpuarray.preallocate > 0:
+        elif preallocate > 0:
            MB = (1024 * 1024)
-            if config.gpuarray.preallocate <= 1:
-                gmem = min(config.gpuarray.preallocate, 0.95) * context.total_gmem
+            if preallocate <= 1:
+                gmem = min(preallocate, 0.95) * context.total_gmem
            else:
-                gmem = config.gpuarray.preallocate * MB
+                gmem = preallocate * MB
            if gmem > context.free_gmem - 50 * MB:
                print(
                    "WARNING: Preallocating too much memory can prevent cudnn and cublas from working properly")
@@ -90,8 +95,8 @@ def init_dev(dev, name=None):
            pygpu.empty((gmem,), dtype='int8', context=context)
            if config.print_active_device:
                print("Preallocating %d/%d Mb (%f) on %s" %
-                      (gmem//MB, context.total_gmem//MB,
-                       gmem/context.total_gmem, dev),
+                      (gmem // MB, context.total_gmem // MB,
+                       gmem / context.total_gmem, dev),
                      file=sys.stderr)

        # Initialise the blas kernels.  We do this after the
@@ -122,7 +127,8 @@ init_dev.devmap = {}
 def use(device,
        force=False,
        default_to_move_computation_to_gpu=True,
-        move_shared_to_gpu=True):
+        move_shared_to_gpu=True,
+        preallocate=None):
    """
    Error and warning about CUDA should be displayed only when this
    function is called. We need to be able to load this module only
@@ -140,17 +146,20 @@ def use(device,
        computations to the gpu.
    move_shared_to_gpu
        If gpu init succeeded, put new shared variables on the gpu.
+    preallocate
+        If specified, will use this value for preallocation instead of
+        gpuarray.preallocate.

    """
    if force:
-        if not device.startswith('cuda'):
+        if not (device.startswith('cuda') or device.startswith('opencl')):
            raise Exception("forced the init and bad device provided: " +
                            device)
        else:
            # If we force, the device should not already be initialized.
            assert device not in init_dev.devmap
    if device:
-        init_dev(device)
+        init_dev(device, preallocate=preallocate)
    if default_to_move_computation_to_gpu:
        optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
        optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
@@ -162,7 +171,7 @@ def use(device,
 if pygpu:
    try:
        if (config.device.startswith('cuda') or
-            config.device.startswith('opencl')):
+                config.device.startswith('opencl')):
            use(config.device)
        elif (config.init_gpu_device.startswith('cuda') or
              config.init_gpu_device.startswith('opencl')):
@@ -183,8 +192,7 @@ if pygpu:

    from .basic_ops import (GpuAlloc, GpuAllocEmpty, GpuContiguous, GpuEye,
                            GpuFromHost, GpuJoin, GpuReshape, GpuSplit,
-                            HostFromGpu)
-    from .basic_ops import host_from_gpu, GpuFromHost
+                            HostFromGpu, host_from_gpu)
    from .elemwise import GpuElemwise
    from .subtensor import (GpuSubtensor, GpuIncSubtensor,
                            GpuAdvancedIncSubtensor1)