提交 755f3648 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #4628 from abergeron/single_stream_flag

Single stream flag
...@@ -487,6 +487,21 @@ import theano and print the config variable, as in: ...@@ -487,6 +487,21 @@ import theano and print the config variable, as in:
automatically to get more memory. But this can cause automatically to get more memory. But this can cause
fragmentation, see note above. fragmentation, see note above.
.. attribute:: config.gpuarray.single_stream
Boolean value
Default: ``True``
Control the stream mode of contexts.
If your computations are mostly lots of small elements, using
single-stream will avoid the synchronization overhead and usually
be faster. For larger elements it does not make a difference yet.
In the future when true multi-stream is enabled in libgpuarray,
this may change. If you want to make sure to have optimal
performance, check both options.
.. attribute:: linker .. attribute:: linker
......
...@@ -242,6 +242,19 @@ AddConfigVar('gpuarray.preallocate', ...@@ -242,6 +242,19 @@ AddConfigVar('gpuarray.preallocate',
FloatParam(0), FloatParam(0),
in_c_key=False) in_c_key=False)
AddConfigVar('gpuarray.single_stream',
"""
If your computations are mostly lots of small elements,
using single-stream will avoid the synchronization
overhead and usually be faster. For larger elements it
does not make a difference yet. In the future when true
multi-stream is enabled in libgpuarray, this may change.
If you want to make sure to have optimal performance,
check both options.
""",
BoolParam(True),
in_c_key=False)
def safe_no_dnn_workmem(workmem): def safe_no_dnn_workmem(workmem):
""" """
......
...@@ -62,7 +62,8 @@ def init_dev(dev, name=None): ...@@ -62,7 +62,8 @@ def init_dev(dev, name=None):
global pygpu_activated global pygpu_activated
if dev not in init_dev.devmap: if dev not in init_dev.devmap:
ctx = pygpu.init(dev, ctx = pygpu.init(dev,
disable_alloc_cache=config.gpuarray.preallocate < 0) disable_alloc_cache=config.gpuarray.preallocate < 0,
single_stream=config.gpuarray.single_stream)
init_dev.devmap[dev] = ctx init_dev.devmap[dev] = ctx
if config.gpuarray.preallocate > 0: if config.gpuarray.preallocate > 0:
MB = (1024 * 1024) MB = (1024 * 1024)
......
...@@ -11,7 +11,7 @@ from theano.tensor import (DimShuffle, get_scalar_constant_value, ...@@ -11,7 +11,7 @@ from theano.tensor import (DimShuffle, get_scalar_constant_value,
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty
from .elemwise import GpuDimShuffle, GpuElemwise from .elemwise import GpuDimShuffle, GpuElemwise
_one = scal.constant(numpy.asarray(1.0, dtype='float64')) _one = scal.constant(numpy.asarray(1.0, dtype='float32'))
def grab_cpu_scalar(v, nd): def grab_cpu_scalar(v, nd):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论