Merge pull request #6085 from nouiz/deterministic

Add deterministic={default,more} flag

Merge pull request #6085 from nouiz/deterministic
9df6ce4e · Pascal Lamblin · GitHub · 5df0cfd8 · c05f0a57 · 9df6ce4e
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -179,6 +179,19 @@ import theano and print the config variable, as in:
    When creating a TensorVariable with dtype float64, what should be done?
    This is useful to help find upcast to float64 in user code.
+.. attribute:: deterministic
+    String value: either ``'default'``, ``'more'``
+    Default: ``'default'``
+    If `more`, sometimes we will select some implementation that
+    are more deterministic, but slower. In particular, on the GPU,
+    we will avoid using AtomicAdd. Sometimes we will still use
+    non-deterministic implementaion, e.g. when we do not have a GPU
+    implementation that is deterministic. Also see the dnn.conv.algo*
+    flags to cover more cases.
 .. attribute:: allow_gc
    Bool value: either ``True`` or ``False``
@@ -194,6 +207,9 @@ import theano and print the config variable, as in:
    significant speed up on functions with many ops that are fast to
    execute, but this increases Theano's memory usage.
+.. note:: if :attr:`config.gpuarray.preallocate` is the default value
+    or not disabled (-1), this is not useful anymore on the GPU.
 .. attribute:: config.scan.allow_output_prealloc
    Bool value, either ``True`` or ``False``

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -79,6 +79,17 @@ AddConfigVar('int_division',
             EnumStr('int', 'raise', 'floatX'),
             in_c_key=False)
+AddConfigVar('deterministic',
+             "If `more`, sometimes we will select some implementation that "
+             "are more deterministic, but slower. In particular, on the GPU, "
+             "we will avoid using AtomicAdd. Sometimes we will still use "
+             "non-deterministic implementaion, e.g. when we do not have a GPU "
+             "implementation that is deterministic. Also see "
+             "the dnn.conv.algo* flags to cover more cases.",
+             EnumStr('default', 'more'),
+             in_c_key=False,
+             )
 # gpu means let the driver select the gpu. Needed in case of gpu in
 # exclusive mode.
 # gpuX mean use the gpu number X.

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -1077,14 +1077,16 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
    set_instead_of_inc = op.set_instead_of_inc
    compute_capability = int(context.bin_id[-2])
-    if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0:
+    if (compute_capability >= 2 and x.ndim == 1 and y.ndim == 0 and
+            config.deterministic == 'default'):
        x = x.dimshuffle(0, 'x')
        y = y.dimshuffle('x', 'x')
        ret = GpuAdvancedIncSubtensor1_dev20(
            set_instead_of_inc=set_instead_of_inc)(x, y, ilist)
        ret = GpuDimShuffle(ret.type.broadcastable, [0])(ret)
        return ret
-    elif compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
+    elif (compute_capability < 2 or x.ndim != 2 or y.ndim != 2 or
+            config.deterministic == 'more'):
        return GpuAdvancedIncSubtensor1(
            set_instead_of_inc=set_instead_of_inc)
    else:

--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -121,6 +121,28 @@ def test_advinc_subtensor1_dtype():
        assert np.allclose(rval, rep)
+@theano.configparser.change_flags(deterministic='more')
+def test_deterministic_flag():
+    shp = (3, 4)
+    for dtype1, dtype2 in [('float32', 'int8')]:
+        shared = gpuarray_shared_constructor
+        xval = np.arange(np.prod(shp), dtype=dtype1).reshape(shp) + 1
+        yval = np.empty((2,) + shp[1:], dtype=dtype2)
+        yval[:] = 10
+        x = shared(xval, name='x')
+        y = tensor.tensor(dtype=yval.dtype,
+                          broadcastable=(False,) * len(yval.shape),
+                          name='y')
+        expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
+        f = theano.function([y], expr, mode=mode_with_gpu)
+        assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1)
+                    for node in f.maker.fgraph.toposort()]) == 1
+        rval = f(yval)
+        rep = xval.copy()
+        rep[[0, 2]] += yval
+        assert np.allclose(rval, rep)
 def test_advinc_subtensor1_vector_scalar():
    # Test the case where x is a vector and y a scalar
    shp = (3,)