Merge pull request #4070 from abergeron/fix_buildbot2

Fix the LogSoftmax tests in DebugMode.

Merge pull request #4070 from abergeron/fix_buildbot2
189069be · Frédéric Bastien · 6b02f8ca · 68880f84 · 189069be · 189069be
--- a/theano/sandbox/cuda/extra_ops.py
+++ b/theano/sandbox/cuda/extra_ops.py
@@ -460,5 +460,5 @@ def use_gpu_cumsum(node):
            axis = 0

        ret = host_from_gpu(GpuCumsum(axis)(x))
-        ret.values_eq_approx = values_eq_approx_high_tol
+        ret.tag.values_eq_approx = values_eq_approx_high_tol
        return [ret]
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1550,7 +1550,7 @@ def local_gpu_conv(node):
                           gpu_from_host(kern))
            out = tensor.patternbroadcast(out,
                                          node.outputs[0].broadcastable)
-            out.values_eq_approx = values_eq_approx_high_tol
+            out.tag.values_eq_approx = values_eq_approx_high_tol
            # in some case the ConvOp broadcast the last 2 dimensions
            # differently then the gpu ConvOp
            return [out]
@@ -1569,7 +1569,7 @@ def local_gpu_conv(node):
            out = tensor.patternbroadcast(
                host_from_gpu(out),
                node.outputs[0].broadcastable)
-            out.values_eq_approx = values_eq_approx_high_tol
+            out.tag.values_eq_approx = values_eq_approx_high_tol
            # in some case the ConvOp broadcast the last 2 dimensions
            # differently then the gpu ConvOp
            return [out]
@@ -2697,7 +2697,7 @@ def local_conv2d_gpu_conv(node):
            # out is on the GPU because both inputs are.
            out = theano.tensor.patternbroadcast(out,
                                                 node.outputs[0].broadcastable)
-            out.values_eq_approx = values_eq_approx_high_tol
+            out.tag.values_eq_approx = values_eq_approx_high_tol
            return [out]

    if isinstance(node.op, BaseAbstractConv2d):
@@ -2724,7 +2724,7 @@ def local_conv2d_gpu_conv(node):
            out = theano.tensor.patternbroadcast(
                out,
                node.outputs[0].broadcastable)
-            out.values_eq_approx = values_eq_approx_high_tol
+            out.tag.values_eq_approx = values_eq_approx_high_tol
            # If the original output was on CPU, we have to transfer it
            if isinstance(node.outputs[0].type, tensor.TensorType):
                return [tensor.as_tensor_variable(out)]

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -19,9 +19,9 @@ from six.moves import xrange
 import theano
 from theano import gof
 from theano import scalar
-from theano.tensor import basic as tensor
-from theano.tensor import subtensor
-from theano.tensor import opt
+from theano.tensor import basic as tensor, subtensor, opt
+from theano.tensor.type import (values_eq_approx_remove_inf,
+                                values_eq_approx_remove_nan)
 from theano.tensor.opt import copy_stack_trace
 from theano.compile import optdb
 from theano.gof import Apply
@@ -751,7 +751,9 @@ def local_logsoftmax(node):
            isinstance(node.inputs[0].owner.op, Softmax)):
        inVars = node.inputs[0].owner.inputs[0]
        new_op = LogSoftmax()
-        return [new_op(inVars)]
+        ret = new_op(inVars)
+        ret.tag.values_eq_approx = values_eq_approx_remove_inf
+        return [ret]


 @opt.register_specialize('stabilize', 'fast_compile')
@@ -784,7 +786,9 @@ def local_logsoftmax_grad(node):
        if grads.broadcastable[1] and not sm.broadcastable[1]:
            grads = tensor.alloc(grads, grads.shape[0], sm.shape[1])

-        return [grads - tensor.sum(grads, axis=1, keepdims=True) * sm]
+        ret = grads - tensor.sum(grads, axis=1, keepdims=True) * sm
+        ret.tag.values_eq_approx = values_eq_approx_remove_nan
+        return [ret]


 def softmax_graph(c):

--- a/theano/tensor/nnet/sigm.py
+++ b/theano/tensor/nnet/sigm.py
@@ -268,7 +268,7 @@ def local_ultra_fast_sigmoid(node):
            # Other test could fail without good reason.
            return tensor.TensorType.values_eq_approx(a, b, atol=0.02)
        # Let DebugMode know that there this opt approx the values.
-        out.values_eq_approx = values_eq_approx_remove_low_prec
+        out.tag.values_eq_approx = values_eq_approx_remove_low_prec
        return [out]
 theano.compile.optdb['uncanonicalize'].register("local_ultra_fast_sigmoid",
                                                local_ultra_fast_sigmoid)
@@ -307,7 +307,7 @@ def local_hard_sigmoid(node):
            # Other test could fail without good reason.
            return tensor.TensorType.values_eq_approx(a, b, atol=0.1)
        # Let DebugMode know that there this opt approx the values.
-        out.values_eq_approx = values_eq_approx_remove_low_prec
+        out.tag.values_eq_approx = values_eq_approx_remove_low_prec
        return [out]
 theano.compile.optdb['uncanonicalize'].register("local_hard_sigmoid",
                                                local_hard_sigmoid)

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -190,6 +190,9 @@ class T_LogSoftmax(utt.InferShapeTester):
        utt.verify_grad(f, [numpy.random.rand(4)])

    def test_allclose(self):
+        m = theano.config.mode
+        m = theano.compile.get_mode(m)
+        m.check_isfinite = False
        x, y = tensor.matrices('xy')
        # regular softmax and crossentropy
        sm = tensor.nnet.softmax(x)
@@ -215,7 +218,7 @@ class T_LogSoftmax(utt.InferShapeTester):
        # now show that the two versions result in the same crossentropy cost
        # this indicates that the forward function does provide some numerical
        # stability
-        f2 = theano.function([x, y], [cm, cm2])
+        f2 = theano.function([x, y], [cm, cm2], mode=m)
        cm_, cm2_ = f2(a, b)
        utt.assert_allclose(cm_, cm2_)

@@ -249,6 +252,9 @@ class T_LogSoftmax(utt.InferShapeTester):
        grad and that the new operation does not explode for big inputs.
        Note that only the grad is checked.
        """
+        m = theano.config.mode
+        m = theano.compile.get_mode(m)
+        m.check_isfinite = False
        # some inputs that are large to make the gradient explode in the non
        # optimized case
        a = numpy.exp(10*numpy.random.rand(5, 10).astype(theano.config.floatX))
@@ -258,7 +264,7 @@ class T_LogSoftmax(utt.InferShapeTester):
            logsm = tensor.log(sm)
            return logsm
        # We set step to 0.1 because for big values we need a big epsilon
-        utt.verify_grad(myfunc, [a], eps=0.1)
+        utt.verify_grad(myfunc, [a], eps=0.1, mode=m)


 class T_SoftmaxGrad(utt.InferShapeTester):

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -3672,7 +3672,7 @@ def local_mul_switch_sink(node):

                    fct = [T.switch(switch.inputs[0], 0,
                                    fmul)]
-                    fct[0].values_eq_approx = values_eq_approx_remove_nan
+                    fct[0].tag.values_eq_approx = values_eq_approx_remove_nan

                    # Copy over stacktrace for switch op from both previous
                    #  elementwise multiplication op and previous switch op,
@@ -3696,7 +3696,7 @@ def local_mul_switch_sink(node):

                    fct = [T.switch(switch.inputs[0],
                                    fmul, 0)]
-                    fct[0].values_eq_approx = values_eq_approx_remove_nan
+                    fct[0].tag.values_eq_approx = values_eq_approx_remove_nan

                    # Copy over stacktrace for switch op from both previous
                    # elementwise multiplication op and previous switch op,
@@ -3740,7 +3740,7 @@ def local_div_switch_sink(node):

                fct = [T.switch(switch.inputs[0], 0,
                                fdiv)]
-                fct[0].values_eq_approx = values_eq_approx_remove_nan
+                fct[0].tag.values_eq_approx = values_eq_approx_remove_nan

                # Copy over stacktrace for switch op from both previous
                # elementwise division op and previous switch op,
@@ -3762,7 +3762,7 @@ def local_div_switch_sink(node):

                fct = [T.switch(switch.inputs[0],
                                fdiv, 0)]
-                fct[0].values_eq_approx = values_eq_approx_remove_nan
+                fct[0].tag.values_eq_approx = values_eq_approx_remove_nan

                # Copy over stacktrace for switch op from both previous
                # elementwise division op and previous switch op,
@@ -5566,7 +5566,7 @@ def local_log_add(node):

                ret = max_pre + T.log1p(T.exp(T.add(*[p - max_pre
                                                      for p in pre_exp])))
-                ret.values_eq_approx = values_eq_approx_remove_inf
+                ret.tag.values_eq_approx = values_eq_approx_remove_inf
                return [ret]


@@ -5990,7 +5990,7 @@ def local_log_erfc(node):
        threshold = 26.641747557

    ret = T.switch(x < threshold, node.outputs[0], stab_value)
-    ret.values_eq_approx = values_eq_approx_remove_inf
+    ret.tag.values_eq_approx = values_eq_approx_remove_inf
    return [ret]


@@ -6142,7 +6142,7 @@ def local_grad_log_erfc_neg(node):
    ret = T.switch(x < threshold, true_div_no_mul, stab_value)
    if y:
        ret = T.mul(ret, *y)
-    ret.values_eq_approx = values_eq_approx_remove_inf_nan
+    ret.tag.values_eq_approx = values_eq_approx_remove_inf_nan
    return [ret]
    """
 The libm used for the test is amdlibm