提交 189069be authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4070 from abergeron/fix_buildbot2

Fix the LogSoftmax tests in DebugMode.
......@@ -460,5 +460,5 @@ def use_gpu_cumsum(node):
axis = 0
ret = host_from_gpu(GpuCumsum(axis)(x))
ret.values_eq_approx = values_eq_approx_high_tol
ret.tag.values_eq_approx = values_eq_approx_high_tol
return [ret]
......@@ -1550,7 +1550,7 @@ def local_gpu_conv(node):
gpu_from_host(kern))
out = tensor.patternbroadcast(out,
node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol
out.tag.values_eq_approx = values_eq_approx_high_tol
# in some case the ConvOp broadcast the last 2 dimensions
# differently then the gpu ConvOp
return [out]
......@@ -1569,7 +1569,7 @@ def local_gpu_conv(node):
out = tensor.patternbroadcast(
host_from_gpu(out),
node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol
out.tag.values_eq_approx = values_eq_approx_high_tol
# in some case the ConvOp broadcast the last 2 dimensions
# differently then the gpu ConvOp
return [out]
......@@ -2697,7 +2697,7 @@ def local_conv2d_gpu_conv(node):
# out is on the GPU because both inputs are.
out = theano.tensor.patternbroadcast(out,
node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol
out.tag.values_eq_approx = values_eq_approx_high_tol
return [out]
if isinstance(node.op, BaseAbstractConv2d):
......@@ -2724,7 +2724,7 @@ def local_conv2d_gpu_conv(node):
out = theano.tensor.patternbroadcast(
out,
node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol
out.tag.values_eq_approx = values_eq_approx_high_tol
# If the original output was on CPU, we have to transfer it
if isinstance(node.outputs[0].type, tensor.TensorType):
return [tensor.as_tensor_variable(out)]
......
......@@ -19,9 +19,9 @@ from six.moves import xrange
import theano
from theano import gof
from theano import scalar
from theano.tensor import basic as tensor
from theano.tensor import subtensor
from theano.tensor import opt
from theano.tensor import basic as tensor, subtensor, opt
from theano.tensor.type import (values_eq_approx_remove_inf,
values_eq_approx_remove_nan)
from theano.tensor.opt import copy_stack_trace
from theano.compile import optdb
from theano.gof import Apply
......@@ -751,7 +751,9 @@ def local_logsoftmax(node):
isinstance(node.inputs[0].owner.op, Softmax)):
inVars = node.inputs[0].owner.inputs[0]
new_op = LogSoftmax()
return [new_op(inVars)]
ret = new_op(inVars)
ret.tag.values_eq_approx = values_eq_approx_remove_inf
return [ret]
@opt.register_specialize('stabilize', 'fast_compile')
......@@ -784,7 +786,9 @@ def local_logsoftmax_grad(node):
if grads.broadcastable[1] and not sm.broadcastable[1]:
grads = tensor.alloc(grads, grads.shape[0], sm.shape[1])
return [grads - tensor.sum(grads, axis=1, keepdims=True) * sm]
ret = grads - tensor.sum(grads, axis=1, keepdims=True) * sm
ret.tag.values_eq_approx = values_eq_approx_remove_nan
return [ret]
def softmax_graph(c):
......
......@@ -268,7 +268,7 @@ def local_ultra_fast_sigmoid(node):
# Other test could fail without good reason.
return tensor.TensorType.values_eq_approx(a, b, atol=0.02)
# Let DebugMode know that there this opt approx the values.
out.values_eq_approx = values_eq_approx_remove_low_prec
out.tag.values_eq_approx = values_eq_approx_remove_low_prec
return [out]
theano.compile.optdb['uncanonicalize'].register("local_ultra_fast_sigmoid",
local_ultra_fast_sigmoid)
......@@ -307,7 +307,7 @@ def local_hard_sigmoid(node):
# Other test could fail without good reason.
return tensor.TensorType.values_eq_approx(a, b, atol=0.1)
# Let DebugMode know that there this opt approx the values.
out.values_eq_approx = values_eq_approx_remove_low_prec
out.tag.values_eq_approx = values_eq_approx_remove_low_prec
return [out]
theano.compile.optdb['uncanonicalize'].register("local_hard_sigmoid",
local_hard_sigmoid)
......
......@@ -190,6 +190,9 @@ class T_LogSoftmax(utt.InferShapeTester):
utt.verify_grad(f, [numpy.random.rand(4)])
def test_allclose(self):
m = theano.config.mode
m = theano.compile.get_mode(m)
m.check_isfinite = False
x, y = tensor.matrices('xy')
# regular softmax and crossentropy
sm = tensor.nnet.softmax(x)
......@@ -215,7 +218,7 @@ class T_LogSoftmax(utt.InferShapeTester):
# now show that the two versions result in the same crossentropy cost
# this indicates that the forward function does provide some numerical
# stability
f2 = theano.function([x, y], [cm, cm2])
f2 = theano.function([x, y], [cm, cm2], mode=m)
cm_, cm2_ = f2(a, b)
utt.assert_allclose(cm_, cm2_)
......@@ -249,6 +252,9 @@ class T_LogSoftmax(utt.InferShapeTester):
grad and that the new operation does not explode for big inputs.
Note that only the grad is checked.
"""
m = theano.config.mode
m = theano.compile.get_mode(m)
m.check_isfinite = False
# some inputs that are large to make the gradient explode in the non
# optimized case
a = numpy.exp(10*numpy.random.rand(5, 10).astype(theano.config.floatX))
......@@ -258,7 +264,7 @@ class T_LogSoftmax(utt.InferShapeTester):
logsm = tensor.log(sm)
return logsm
# We set step to 0.1 because for big values we need a big epsilon
utt.verify_grad(myfunc, [a], eps=0.1)
utt.verify_grad(myfunc, [a], eps=0.1, mode=m)
class T_SoftmaxGrad(utt.InferShapeTester):
......
......@@ -3672,7 +3672,7 @@ def local_mul_switch_sink(node):
fct = [T.switch(switch.inputs[0], 0,
fmul)]
fct[0].values_eq_approx = values_eq_approx_remove_nan
fct[0].tag.values_eq_approx = values_eq_approx_remove_nan
# Copy over stacktrace for switch op from both previous
# elementwise multiplication op and previous switch op,
......@@ -3696,7 +3696,7 @@ def local_mul_switch_sink(node):
fct = [T.switch(switch.inputs[0],
fmul, 0)]
fct[0].values_eq_approx = values_eq_approx_remove_nan
fct[0].tag.values_eq_approx = values_eq_approx_remove_nan
# Copy over stacktrace for switch op from both previous
# elementwise multiplication op and previous switch op,
......@@ -3740,7 +3740,7 @@ def local_div_switch_sink(node):
fct = [T.switch(switch.inputs[0], 0,
fdiv)]
fct[0].values_eq_approx = values_eq_approx_remove_nan
fct[0].tag.values_eq_approx = values_eq_approx_remove_nan
# Copy over stacktrace for switch op from both previous
# elementwise division op and previous switch op,
......@@ -3762,7 +3762,7 @@ def local_div_switch_sink(node):
fct = [T.switch(switch.inputs[0],
fdiv, 0)]
fct[0].values_eq_approx = values_eq_approx_remove_nan
fct[0].tag.values_eq_approx = values_eq_approx_remove_nan
# Copy over stacktrace for switch op from both previous
# elementwise division op and previous switch op,
......@@ -5566,7 +5566,7 @@ def local_log_add(node):
ret = max_pre + T.log1p(T.exp(T.add(*[p - max_pre
for p in pre_exp])))
ret.values_eq_approx = values_eq_approx_remove_inf
ret.tag.values_eq_approx = values_eq_approx_remove_inf
return [ret]
......@@ -5990,7 +5990,7 @@ def local_log_erfc(node):
threshold = 26.641747557
ret = T.switch(x < threshold, node.outputs[0], stab_value)
ret.values_eq_approx = values_eq_approx_remove_inf
ret.tag.values_eq_approx = values_eq_approx_remove_inf
return [ret]
......@@ -6142,7 +6142,7 @@ def local_grad_log_erfc_neg(node):
ret = T.switch(x < threshold, true_div_no_mul, stab_value)
if y:
ret = T.mul(ret, *y)
ret.values_eq_approx = values_eq_approx_remove_inf_nan
ret.tag.values_eq_approx = values_eq_approx_remove_inf_nan
return [ret]
"""
The libm used for the test is amdlibm
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论