Merge pull request #6540 from nouiz/tests

Fix tests in DebugMode and make them less verboase

Merge pull request #6540 from nouiz/tests
f62fd394 · Frédéric Bastien · GitHub · 58689970 · 578573fc · f62fd394
--- a/theano/compile/tests/test_monitormode.py
+++ b/theano/compile/tests/test_monitormode.py
 from __future__ import absolute_import, print_function, division
+import sys
 import numpy as np
+from six.moves import StringIO
 import theano
@@ -23,7 +26,12 @@ def test_detect_nan():
    f = theano.function([x], [theano.tensor.log(x) * x],
                        mode=theano.compile.MonitorMode(
                            post_func=detect_nan))
-    f(0)  # log(0) * 0 = -inf * 0 = NaN
+    try:
+        old_stdout = sys.stdout
+        sys.stdout = StringIO()
+        f(0)  # log(0) * 0 = -inf * 0 = NaN
+    finally:
+        sys.stdout = old_stdout
    assert nan_detected[0]
@@ -49,7 +57,12 @@ def test_optimizer():
                        mode=mode)
    # Test that the fusion wasn't done
    assert len(f.maker.fgraph.apply_nodes) == 2
-    f(0)  # log(0) * 0 = -inf * 0 = NaN
+    try:
+        old_stdout = sys.stdout
+        sys.stdout = StringIO()
+        f(0)  # log(0) * 0 = -inf * 0 = NaN
+    finally:
+        sys.stdout = old_stdout
    # Test that we still detect the nan
    assert nan_detected[0]
@@ -83,7 +96,12 @@ def test_not_inplace():
    # Test that the fusion wasn't done
    assert len(f.maker.fgraph.apply_nodes) == 5
    assert not f.maker.fgraph.toposort()[-1].op.destroy_map
-    f([0, 0])  # log(0) * 0 = -inf * 0 = NaN
+    try:
+        old_stdout = sys.stdout
+        sys.stdout = StringIO()
+        f([0, 0])  # log(0) * 0 = -inf * 0 = NaN
+    finally:
+        sys.stdout = old_stdout
    # Test that we still detect the nan
    assert nan_detected[0]
--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -37,6 +37,15 @@ def _list_of_nodes(fgraph):
    return list(graph.io_toposort(fgraph.inputs, fgraph.outputs))
+class LocalMetaOptimizerSkipAssertionError(AssertionError):
+    """This is an AssertionError, but instead of having the
+    LocalMetaOptimizer print the error, it just skip that
+    compilation.
+    """
+    pass
 class Optimizer(object):
    """
@@ -1130,6 +1139,10 @@ class LocalMetaOptimizer(LocalOptimizer):
    Base class for meta-optimizers that try a set of LocalOptimizers
    to replace a node and choose the one that executes the fastest.
+    If the error LocalMetaOptimizerSkipAssertionError is raised during
+    compilation, we will skip that function compilation and not print
+    the error.
    """
    def __init__(self):
@@ -1194,6 +1207,8 @@ class LocalMetaOptimizer(LocalOptimizer):
                                         on_unused_input='ignore')
                    fn.trust_input = True
                    timing = min(self.time_call(fn) for _ in range(2))
+                except LocalMetaOptimizerSkipAssertionError:
+                    continue
                except Exception as e:
                    if self.verbose > 0:
                        print("* %s: exception" % opt, e)

--- a/theano/gof/utils.py
+++ b/theano/gof/utils.py
@@ -128,8 +128,10 @@ def get_variable_trace_string(v):
            traceback.print_list(v.tag.trace, sio)
        else:
            # Print separate message for each element in the list of
-            # batcktraces
+            # backtraces
-            for subtr in tr:
+            for idx, subtr in enumerate(tr):
+                if len(tr) > 1:
+                    print("trace %d" % idx, file=sio)
                traceback.print_list(subtr, sio)
    return sio.getvalue()

--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -576,9 +576,13 @@ class HostFromGpu(Op):
    def make_node(self, x):
        if not isinstance(x.type, GpuArrayType):
            raise TypeError(x)
-        return Apply(self, [x],
+        out_var = tensor.TensorType(dtype=x.dtype,
-                     [tensor.TensorType(dtype=x.dtype,
+                                    broadcastable=x.broadcastable)()
-                                        broadcastable=x.broadcastable)()])
+        # Keep the special comparison if there is one.
+        values_eq_approx = getattr(x.tag, 'values_eq_approx', None)
+        if values_eq_approx:
+            out_var.tag.values_eq_approx = values_eq_approx
+        return Apply(self, [x], [out_var])
    def perform(self, node, inp, out):
        x, = inp
@@ -664,9 +668,14 @@ class GpuFromHost(Op):
            raise TypeError(x)
        if "complex" in x.dtype:
            raise TypeError("complex not supported in the new gpuarray back-end.", x)
-        return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable,
+        out_var = GpuArrayType(broadcastable=x.broadcastable,
-                                              context_name=self.context_name,
+                               context_name=self.context_name,
-                                              dtype=x.dtype)()])
+                               dtype=x.dtype)()
+        # Keep the special comparison if there is one.
+        values_eq_approx = getattr(x.tag, 'values_eq_approx', None)
+        if values_eq_approx:
+            out_var.tag.values_eq_approx = values_eq_approx
+        return Apply(self, [x], [out_var])
    def get_params(self, node):
        return get_context(self.context_name)

--- a/theano/gpuarray/sort.py
+++ b/theano/gpuarray/sort.py
@@ -2,6 +2,9 @@ from __future__ import absolute_import, print_function, division
 import os
 from string import Template
+import numpy as np
+import theano
 from theano import Apply
 from theano.tensor import as_tensor_variable
 from theano.tensor.sort import TopKOp
@@ -21,8 +24,10 @@ except ImportError as e:
 # TODO GPU sort / argsort
 class GpuTopKOp(GpuKernelBase, TopKOp):
-    '''
+    '''Implements TopKOp on gpu
-    Implements TopKOp on gpu
+    Currently the output seem sorted, but we do not test it. So as on
+    the CPU, we only support sorted=False for now.
    '''
    __props__ = TopKOp.__props__
@@ -35,6 +40,9 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
        return_values=True,
        return_indices=True
    ):
+        if sorted:
+            raise NotImplementedError(
+                "GpuTopK currently is not sure to give sorted output even if they look sorted..")
        GpuKernelBase.__init__(self)
        TopKOp.__init__(
            self, axis=axis,
@@ -43,6 +51,9 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
            return_values=return_values,
            return_indices=return_indices)
+    def perform(self, node, inputs, output_storage, params):
+        raise NotImplementedError()
    def c_headers(self):
        return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
@@ -325,6 +336,21 @@ class GpuTopKOp(GpuKernelBase, TopKOp):
        return node.inputs[0].type.context
+class ValuesEqApproxNoOrder():
+    """
+    We ignore the order of elements on a given axis during the comparison.
+    """
+    def __init__(self, axis):
+        self.axis = axis
+    def __call__(self, val1, val2):
+        v1 = np.sort(val1, axis=self.axis)
+        v2 = np.sort(val2, axis=self.axis)
+        ret = theano.tensor.type.values_eq_approx(v1, v2)
+        return ret
 @register_opt('fast_compile')
 @op_lifter([TopKOp], cuda_only=True)
 @register_opt2([TopKOp], 'fast_compile')
@@ -334,12 +360,16 @@ def local_gpua_topkop(op, ctx_name, inputs, outputs):
    ri = op.return_indices
    x, k = inputs
    x = as_gpuarray_variable(x, ctx_name)
+    if op.sorted:
+        return
    gpu_op = GpuTopKOp(
        axis=axis,
        sorted=op.sorted,
        idx_dtype=op.idx_dtype,
        return_values=rv,
        return_indices=ri)
-    rets = gpu_op(x, k)
+    rets = gpu_op(x, k, return_list=True)
+    c = ValuesEqApproxNoOrder(axis)
+    for r in rets:
+        r.tag.values_eq_approx = c
    return rets
--- a/theano/gpuarray/tests/test_dnn.py
+++ b/theano/gpuarray/tests/test_dnn.py
@@ -1296,6 +1296,8 @@ def test_conv3d_bwd():
        # Raise tolerance for float16
        if theano.config.floatX == 'float16':
            rtol = 5e-2
+        elif max(inputs_shape) > 1024 or max(filters_shape) > 1024:
+            rtol = 2e-5
        utt.assert_allclose(res_ref[0], res[0], rtol=rtol)
        utt.assert_allclose(res_ref[1], res[1], rtol=rtol)

--- a/theano/gpuarray/tests/test_elemwise.py
+++ b/theano/gpuarray/tests/test_elemwise.py
@@ -59,7 +59,6 @@ def test_elemwise_pow():
            assert exp.dtype == dtype_exp
            output = base ** exp
            f = theano.function([base], output, mode=mode_with_gpu)
-            theano.printing.debugprint(f)
            # We don't transfer to the GPU when the output dtype is int*
            n = len([n for n in f.maker.fgraph.apply_nodes
                     if isinstance(n.op, GpuElemwise)])

--- a/theano/tensor/nnet/opt.py
+++ b/theano/tensor/nnet/opt.py
@@ -490,7 +490,7 @@ def local_abstractconv_check(node):
                            AbstractConv3d,
                            AbstractConv3d_gradWeights,
                            AbstractConv3d_gradInputs)):
-        raise AssertionError(
+        raise gof.opt.LocalMetaOptimizerSkipAssertionError(
            '%s Theano optimization failed: there is no implementation '
            'available supporting the requested options. Did you exclude '
            'both "conv_dnn" and "conv_gemm" from the optimizer? If on GPU, '

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -7567,4 +7567,5 @@ def local_useless_topk(node):
        idx_dtype=op.idx_dtype,
        return_values=ret_val,
        return_indices=ret_idx)(x, k)
+    copy_stack_trace(node.outputs[0], new_output)
    return {old_output: new_output}
--- a/theano/tensor/sort.py
+++ b/theano/tensor/sort.py
@@ -287,8 +287,7 @@ def _topk_py_impl(op, x, k, axis, idx_dtype):
 class TopKOp(theano.Op):
-    """
+    """Operations related to finding k-largest elements.
-    Operations related to finding k-largest elements.
    Parameters
    ----------
@@ -309,14 +308,18 @@ class TopKOp(theano.Op):
    Notes
    -----
-    - By default, this Op give two outputs: values and indices. However optimizer may
+    - CPU and GPU ops don't produce same output order. This is expected.
-      remove a certain output if not needed.
+    - The output order is not guaranteed. On the CPU, we use
+      ``np.partition`` and ``np.argpartition`` that only make sure the
-    - Computing gradient is only possible when both values and indices are computed in
+      k-th element is the correct one and that the other
+      elements are on the correct side. On the GPU, they
+      look sorted, but we do not test the correctness of this behavior.
+    - By default, this Op gives two outputs: values and indices. However
+      optimizers may remove a certain output if not needed.
+    - Computing the gradient requests the computation of the indices in
      forward pass.
+    - If the top-k-th value is not unique, we cannot guarantee the
-    - If the top-k-th value is not unique, we cannot guarantee the output indices being
+      output indices being deterministically chosen.
-      deterministically chosen.
    See Also
    --------
@@ -354,6 +357,9 @@ class TopKOp(theano.Op):
        if not isinstance(axis, int):
            raise TypeError(
                '"axis" parameter must be integer, got "%s"' % type(axis))
+        if sorted:
+            raise NotImplementedError(
+                "The sorted parameter is not yet implemented. Use sorted=False for now.")
        if idx_dtype not in theano.tensor.integer_dtypes:
            raise TypeError(
                '"idx_dtype" parameter must be an integer dtype, got "%s"' % idx_dtype)
@@ -473,9 +479,6 @@ def topk(x, kth, axis=-1, sorted=True, idx_dtype='int64'):
    - ``sorted=True`` is not supported yet.
    """
-    if sorted:
-        raise NotImplementedError(
-            "We are still working on sorted topk. Use sorted=False for now.")
    if axis is None:
        x = theano.tensor.flatten(x)
        axis = 0
@@ -523,9 +526,6 @@ def argtopk(x, kth, axis=-1, sorted=True, idx_dtype='int64'):
      indices are deterministically chosen.
    """
-    if sorted:
-        raise NotImplementedError(
-            "We are still working on sorted topk. Use sorted=False for now.")
    if axis is None:
        x = theano.tensor.flatten(x)
        axis = 0
@@ -546,9 +546,6 @@ def topk_and_argtopk(x, kth, axis=-1, sorted=True, idx_dtype='int64'):
    tuple: (values, indices)
    """
-    if sorted:
-        raise NotImplementedError(
-            "We are still working on sorted topk. Use sorted=False for now.")
    if axis is None:
        x = theano.tensor.flatten(x)
        axis = 0

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -820,7 +820,7 @@ def test_maximum_minimum_grad():
    for op in [tensor.maximum, tensor.minimum]:
        o = op(x, y)
        g = theano.grad(o.sum(), [x, y])
-        theano.printing.debugprint(g)
        f = theano.function([x, y], g)
        assert np.allclose(f([1], [1]), [[1], [0]])
@@ -6583,8 +6583,8 @@ class test_tensordot(unittest.TestCase):
            f3 = inplace_func([amat, bmat], c)
            aval = rand(4, 7)
            bval = rand(7, 9)
-            self.assertTrue(np.allclose(np.tensordot(aval, bval, axes),
+            utt.assert_allclose(np.tensordot(aval, bval, axes),
-                                        f3(aval, bval)))
+                                f3(aval, bval))
            utt.verify_grad(self.TensorDot(axes), [aval, bval])
    def test_scalar_axes(self):
@@ -7789,7 +7789,7 @@ class TestSpecifyShape(unittest.TestCase):
        f(xval)
        xval = np.random.rand(3).astype(floatX)
        self.assertRaises(AssertionError, f, xval)
-        theano.printing.debugprint(f)
        assert isinstance([n for n in f.maker.fgraph.toposort()
                           if isinstance(n.op, SpecifyShape)][0].inputs[0].type,
                          self.input_type)

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -1116,7 +1116,6 @@ class test_fusion(unittest.TestCase):
                 nb_elemwise, answer, out_dtype] in enumerate(cases):
            if isinstance(out_dtype, dict):
                out_dtype = out_dtype[config.cast_policy]
-            print("new cases", id)
            if shared_fn is None:
                f = compile.function(list(sym_inputs), g, mode=mode)
@@ -1139,6 +1138,7 @@ class test_fusion(unittest.TestCase):
                atol = 1e-6
            if not np.allclose(out, answer * nb_repeat, atol=atol):
                fail1.append(id)
+                print("cases", id)
                print(val_inputs)
                print(out)
                print(answer * nb_repeat)
@@ -1163,8 +1163,8 @@ class test_fusion(unittest.TestCase):
                fail4.append((id, out_dtype, out.dtype))
        failed = len(fail1 + fail2 + fail3 + fail4)
-        print("Executed", len(cases), "cases", "failed", failed)
        if failed > 0:
+            print("Executed", len(cases), "cases", "failed", failed)
            raise Exception("Failed %d cases" % failed, fail1,
                            fail2, fail3, fail4)

--- a/theano/tensor/tests/test_sort.py
+++ b/theano/tensor/tests/test_sort.py
@@ -373,7 +373,12 @@ class Test_TopK(unittest.TestCase):
        x = theano.tensor.vector(name='x', dtype=dtype)
        y = argtopk(x, k, sorted=sorted, idx_dtype='int32')
-        fn = theano.function([x], y, mode=self.mode)
+        # DebugMode won't like the index change on collision on CPU
+        # So don't use DebugMode here.
+        mode = self.mode
+        if isinstance(self.mode, theano.compile.DebugMode):
+            mode = theano.Mode(optimizer=mode.optimizer)
+        fn = theano.function([x], y, mode=mode)
        assert any([isinstance(n.op, self.op_class) for n in fn.maker.fgraph.apply_nodes])
        xval = np.repeat(np.random.uniform(-100., 100., size=size // 2).astype(dtype), 2)
        xval = xval[np.random.permutation(size)]