Merge pull request #2657 from nouiz/test

Fix random failure in test due to rounding change.

Merge pull request #2657 from nouiz/test
c817315b · Pascal Lamblin · 4e91569d · 8dde2999 · c817315b · c817315b
--- a/doc/library/tensor/nnet/conv.txt
+++ b/doc/library/tensor/nnet/conv.txt
@@ -27,8 +27,7 @@
    As of October 21st, 2014, the default GPU image convolution
    changed: By default, if :ref:`cuDNN <libdoc_cuda_dnn>`
    is available, we will use it, otherwise we will fall back to using the
-    gemm version (slower then cuDNN in most cases, uses more memory, but
+    gemm version (slower then cuDNN in most cases and uses more memory).
-    faster than the legacy version we used before).
    Both cuDNN and the gemm version can be disabled using the Theano flags
    ``optimizer_excluding=conv_dnn`` and ``optimizer_excluding=conv_gemm``,

--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -23,6 +23,9 @@ gpu_seqopt = SequenceDB()
 def register_opt(*tags, **kwargs):
+    if any([not isinstance(t, str) for t in tags]):
+        raise RuntimeError("Bad call to register_opt."
+                           " All tags must be strings.", tags)
    def f(local_opt):
        name = (kwargs and kwargs.pop('name')) or local_opt.__name__
        gpu_optimizer.register(name, local_opt, 'fast_run', 'fast_compile',

--- a/theano/sandbox/cuda/extra_ops.py
+++ b/theano/sandbox/cuda/extra_ops.py
@@ -406,6 +406,22 @@ class GpuCumsum(CumsumOp, GpuOp):
        return code
+def values_eq_approx_high_tol(a, b):
+    """This fct is needed to don't have DebugMode raise useless
+    error due to ronding error.
+    This happen with big input size due to change in the order of
+    operation.
+    """
+    rtol = None
+    if a.size > 100000:
+        # For float32 the default rtol is 1e-5
+        rtol = 5e-5
+    return CudaNdarrayType.values_eq_approx(a, b, rtol=rtol)
+@register_gpu_opt()
 @local_optimizer([CumsumOp])
 def use_gpu_cumsum(node):
    if type(node.op) is CumsumOp \
@@ -427,8 +443,6 @@ def use_gpu_cumsum(node):
        # ``gpu_cumsum`` assume array has been flattened if needed.
        if axis is None:
            axis = 0
+        ret = host_from_gpu(GpuCumsum(axis)(x))
-        return [host_from_gpu(GpuCumsum(axis)(x))]
+        ret.values_eq_approx = values_eq_approx_high_tol
+        return [ret]
-if cuda_available:
-    register_gpu_opt()(use_gpu_cumsum)