Merge pull request #4821 from nouiz/gpuarray_sched

Add the theano flags gpuarray.sched

Merge pull request #4821 from nouiz/gpuarray_sched
26445969 · abergeron · GitHub · 3220e103 · 151d3f44 · 26445969
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -487,6 +487,21 @@ import theano and print the config variable, as in:
        automatically to get more memory. But this can cause
        fragmentation, see note above.

+.. attribute:: config.gpuarray.sched
+
+    String value: ``'default'``, ``'multi'``, ``'single'``
+
+    Default: ``'default'``
+
+    Control the stream mode of contexts.
+
+    The sched parameter passed for context creation to pygpu.  With
+    CUDA, using "multi" mean using the parameter
+    cudaDeviceScheduleYield. This is useful to lower the CPU overhead
+    when waiting for GPU. One user found that it speeds up his other
+    processes that was doing data augmentation.
+
+
 .. attribute:: config.gpuarray.single_stream

    Boolean value

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -242,6 +242,15 @@ AddConfigVar('gpuarray.preallocate',
             FloatParam(0),
             in_c_key=False)

+AddConfigVar('gpuarray.sched',
+             """The sched parameter passed for context creation to pygpu.
+                With CUDA, using "multi" is equivalent to using the parameter
+                cudaDeviceScheduleYield. This is useful to lower the
+                CPU overhead when waiting for GPU. One user found that it
+                speeds up his other processes that was doing data augmentation.
+             """,
+             EnumStr("default", "multi", "single"))
+
 AddConfigVar('gpuarray.single_stream',
             """
             If your computations are mostly lots of small elements,

--- a/theano/gpuarray/__init__.py
+++ b/theano/gpuarray/__init__.py
@@ -63,7 +63,8 @@ def init_dev(dev, name=None):
    if dev not in init_dev.devmap:
        ctx = pygpu.init(dev,
                         disable_alloc_cache=config.gpuarray.preallocate < 0,
-                         single_stream=config.gpuarray.single_stream)
+                         single_stream=config.gpuarray.single_stream,
+                         sched=config.gpuarray.sched)
        init_dev.devmap[dev] = ctx
        if config.gpuarray.preallocate > 0:
            MB = (1024 * 1024)