提交 9df6ce4e authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #6085 from nouiz/deterministic

Add deterministic={default,more} flag
...@@ -179,6 +179,19 @@ import theano and print the config variable, as in: ...@@ -179,6 +179,19 @@ import theano and print the config variable, as in:
When creating a TensorVariable with dtype float64, what should be done? When creating a TensorVariable with dtype float64, what should be done?
This is useful to help find upcast to float64 in user code. This is useful to help find upcast to float64 in user code.
.. attribute:: deterministic
String value: either ``'default'``, ``'more'``
Default: ``'default'``
If `more`, sometimes we will select some implementation that
are more deterministic, but slower. In particular, on the GPU,
we will avoid using AtomicAdd. Sometimes we will still use
non-deterministic implementaion, e.g. when we do not have a GPU
implementation that is deterministic. Also see the dnn.conv.algo*
flags to cover more cases.
.. attribute:: allow_gc .. attribute:: allow_gc
Bool value: either ``True`` or ``False`` Bool value: either ``True`` or ``False``
...@@ -194,6 +207,9 @@ import theano and print the config variable, as in: ...@@ -194,6 +207,9 @@ import theano and print the config variable, as in:
significant speed up on functions with many ops that are fast to significant speed up on functions with many ops that are fast to
execute, but this increases Theano's memory usage. execute, but this increases Theano's memory usage.
.. note:: if :attr:`config.gpuarray.preallocate` is the default value
or not disabled (-1), this is not useful anymore on the GPU.
.. attribute:: config.scan.allow_output_prealloc .. attribute:: config.scan.allow_output_prealloc
Bool value, either ``True`` or ``False`` Bool value, either ``True`` or ``False``
......
...@@ -79,6 +79,17 @@ AddConfigVar('int_division', ...@@ -79,6 +79,17 @@ AddConfigVar('int_division',
EnumStr('int', 'raise', 'floatX'), EnumStr('int', 'raise', 'floatX'),
in_c_key=False) in_c_key=False)
AddConfigVar('deterministic',
"If `more`, sometimes we will select some implementation that "
"are more deterministic, but slower. In particular, on the GPU, "
"we will avoid using AtomicAdd. Sometimes we will still use "
"non-deterministic implementaion, e.g. when we do not have a GPU "
"implementation that is deterministic. Also see "
"the dnn.conv.algo* flags to cover more cases.",
EnumStr('default', 'more'),
in_c_key=False,
)
# gpu means let the driver select the gpu. Needed in case of gpu in # gpu means let the driver select the gpu. Needed in case of gpu in
# exclusive mode. # exclusive mode.
# gpuX mean use the gpu number X. # gpuX mean use the gpu number X.
......
...@@ -1077,14 +1077,16 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs): ...@@ -1077,14 +1077,16 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
set_instead_of_inc = op.set_instead_of_inc set_instead_of_inc = op.set_instead_of_inc
compute_capability = int(context.bin_id[-2]) compute_capability = int(context.bin_id[-2])
if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0: if (compute_capability >= 2 and x.ndim == 1 and y.ndim == 0 and
config.deterministic == 'default'):
x = x.dimshuffle(0, 'x') x = x.dimshuffle(0, 'x')
y = y.dimshuffle('x', 'x') y = y.dimshuffle('x', 'x')
ret = GpuAdvancedIncSubtensor1_dev20( ret = GpuAdvancedIncSubtensor1_dev20(
set_instead_of_inc=set_instead_of_inc)(x, y, ilist) set_instead_of_inc=set_instead_of_inc)(x, y, ilist)
ret = GpuDimShuffle(ret.type.broadcastable, [0])(ret) ret = GpuDimShuffle(ret.type.broadcastable, [0])(ret)
return ret return ret
elif compute_capability < 2 or x.ndim != 2 or y.ndim != 2: elif (compute_capability < 2 or x.ndim != 2 or y.ndim != 2 or
config.deterministic == 'more'):
return GpuAdvancedIncSubtensor1( return GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc) set_instead_of_inc=set_instead_of_inc)
else: else:
......
...@@ -121,6 +121,28 @@ def test_advinc_subtensor1_dtype(): ...@@ -121,6 +121,28 @@ def test_advinc_subtensor1_dtype():
assert np.allclose(rval, rep) assert np.allclose(rval, rep)
@theano.configparser.change_flags(deterministic='more')
def test_deterministic_flag():
shp = (3, 4)
for dtype1, dtype2 in [('float32', 'int8')]:
shared = gpuarray_shared_constructor
xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1
yval = np.empty((2,) + shp[1:], dtype=dtype2)
yval[:] = 10
x = shared(xval, name='x')
y = tensor.tensor(dtype=yval.dtype,
broadcastable=(False,) * len(yval.shape),
name='y')
expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
f = theano.function([y], expr, mode=mode_with_gpu)
assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1)
for node in f.maker.fgraph.toposort()]) == 1
rval = f(yval)
rep = xval.copy()
rep[[0, 2]] += yval
assert np.allclose(rval, rep)
def test_advinc_subtensor1_vector_scalar(): def test_advinc_subtensor1_vector_scalar():
# Test the case where x is a vector and y a scalar # Test the case where x is a vector and y a scalar
shp = (3,) shp = (3,)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论