提交 189069be authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4070 from abergeron/fix_buildbot2

Fix the LogSoftmax tests in DebugMode.
...@@ -460,5 +460,5 @@ def use_gpu_cumsum(node): ...@@ -460,5 +460,5 @@ def use_gpu_cumsum(node):
axis = 0 axis = 0
ret = host_from_gpu(GpuCumsum(axis)(x)) ret = host_from_gpu(GpuCumsum(axis)(x))
ret.values_eq_approx = values_eq_approx_high_tol ret.tag.values_eq_approx = values_eq_approx_high_tol
return [ret] return [ret]
...@@ -1550,7 +1550,7 @@ def local_gpu_conv(node): ...@@ -1550,7 +1550,7 @@ def local_gpu_conv(node):
gpu_from_host(kern)) gpu_from_host(kern))
out = tensor.patternbroadcast(out, out = tensor.patternbroadcast(out,
node.outputs[0].broadcastable) node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol out.tag.values_eq_approx = values_eq_approx_high_tol
# in some case the ConvOp broadcast the last 2 dimensions # in some case the ConvOp broadcast the last 2 dimensions
# differently then the gpu ConvOp # differently then the gpu ConvOp
return [out] return [out]
...@@ -1569,7 +1569,7 @@ def local_gpu_conv(node): ...@@ -1569,7 +1569,7 @@ def local_gpu_conv(node):
out = tensor.patternbroadcast( out = tensor.patternbroadcast(
host_from_gpu(out), host_from_gpu(out),
node.outputs[0].broadcastable) node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol out.tag.values_eq_approx = values_eq_approx_high_tol
# in some case the ConvOp broadcast the last 2 dimensions # in some case the ConvOp broadcast the last 2 dimensions
# differently then the gpu ConvOp # differently then the gpu ConvOp
return [out] return [out]
...@@ -2697,7 +2697,7 @@ def local_conv2d_gpu_conv(node): ...@@ -2697,7 +2697,7 @@ def local_conv2d_gpu_conv(node):
# out is on the GPU because both inputs are. # out is on the GPU because both inputs are.
out = theano.tensor.patternbroadcast(out, out = theano.tensor.patternbroadcast(out,
node.outputs[0].broadcastable) node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol out.tag.values_eq_approx = values_eq_approx_high_tol
return [out] return [out]
if isinstance(node.op, BaseAbstractConv2d): if isinstance(node.op, BaseAbstractConv2d):
...@@ -2724,7 +2724,7 @@ def local_conv2d_gpu_conv(node): ...@@ -2724,7 +2724,7 @@ def local_conv2d_gpu_conv(node):
out = theano.tensor.patternbroadcast( out = theano.tensor.patternbroadcast(
out, out,
node.outputs[0].broadcastable) node.outputs[0].broadcastable)
out.values_eq_approx = values_eq_approx_high_tol out.tag.values_eq_approx = values_eq_approx_high_tol
# If the original output was on CPU, we have to transfer it # If the original output was on CPU, we have to transfer it
if isinstance(node.outputs[0].type, tensor.TensorType): if isinstance(node.outputs[0].type, tensor.TensorType):
return [tensor.as_tensor_variable(out)] return [tensor.as_tensor_variable(out)]
......
...@@ -19,9 +19,9 @@ from six.moves import xrange ...@@ -19,9 +19,9 @@ from six.moves import xrange
import theano import theano
from theano import gof from theano import gof
from theano import scalar from theano import scalar
from theano.tensor import basic as tensor from theano.tensor import basic as tensor, subtensor, opt
from theano.tensor import subtensor from theano.tensor.type import (values_eq_approx_remove_inf,
from theano.tensor import opt values_eq_approx_remove_nan)
from theano.tensor.opt import copy_stack_trace from theano.tensor.opt import copy_stack_trace
from theano.compile import optdb from theano.compile import optdb
from theano.gof import Apply from theano.gof import Apply
...@@ -751,7 +751,9 @@ def local_logsoftmax(node): ...@@ -751,7 +751,9 @@ def local_logsoftmax(node):
isinstance(node.inputs[0].owner.op, Softmax)): isinstance(node.inputs[0].owner.op, Softmax)):
inVars = node.inputs[0].owner.inputs[0] inVars = node.inputs[0].owner.inputs[0]
new_op = LogSoftmax() new_op = LogSoftmax()
return [new_op(inVars)] ret = new_op(inVars)
ret.tag.values_eq_approx = values_eq_approx_remove_inf
return [ret]
@opt.register_specialize('stabilize', 'fast_compile') @opt.register_specialize('stabilize', 'fast_compile')
...@@ -784,7 +786,9 @@ def local_logsoftmax_grad(node): ...@@ -784,7 +786,9 @@ def local_logsoftmax_grad(node):
if grads.broadcastable[1] and not sm.broadcastable[1]: if grads.broadcastable[1] and not sm.broadcastable[1]:
grads = tensor.alloc(grads, grads.shape[0], sm.shape[1]) grads = tensor.alloc(grads, grads.shape[0], sm.shape[1])
return [grads - tensor.sum(grads, axis=1, keepdims=True) * sm] ret = grads - tensor.sum(grads, axis=1, keepdims=True) * sm
ret.tag.values_eq_approx = values_eq_approx_remove_nan
return [ret]
def softmax_graph(c): def softmax_graph(c):
......
...@@ -268,7 +268,7 @@ def local_ultra_fast_sigmoid(node): ...@@ -268,7 +268,7 @@ def local_ultra_fast_sigmoid(node):
# Other test could fail without good reason. # Other test could fail without good reason.
return tensor.TensorType.values_eq_approx(a, b, atol=0.02) return tensor.TensorType.values_eq_approx(a, b, atol=0.02)
# Let DebugMode know that there this opt approx the values. # Let DebugMode know that there this opt approx the values.
out.values_eq_approx = values_eq_approx_remove_low_prec out.tag.values_eq_approx = values_eq_approx_remove_low_prec
return [out] return [out]
theano.compile.optdb['uncanonicalize'].register("local_ultra_fast_sigmoid", theano.compile.optdb['uncanonicalize'].register("local_ultra_fast_sigmoid",
local_ultra_fast_sigmoid) local_ultra_fast_sigmoid)
...@@ -307,7 +307,7 @@ def local_hard_sigmoid(node): ...@@ -307,7 +307,7 @@ def local_hard_sigmoid(node):
# Other test could fail without good reason. # Other test could fail without good reason.
return tensor.TensorType.values_eq_approx(a, b, atol=0.1) return tensor.TensorType.values_eq_approx(a, b, atol=0.1)
# Let DebugMode know that there this opt approx the values. # Let DebugMode know that there this opt approx the values.
out.values_eq_approx = values_eq_approx_remove_low_prec out.tag.values_eq_approx = values_eq_approx_remove_low_prec
return [out] return [out]
theano.compile.optdb['uncanonicalize'].register("local_hard_sigmoid", theano.compile.optdb['uncanonicalize'].register("local_hard_sigmoid",
local_hard_sigmoid) local_hard_sigmoid)
......
...@@ -190,6 +190,9 @@ class T_LogSoftmax(utt.InferShapeTester): ...@@ -190,6 +190,9 @@ class T_LogSoftmax(utt.InferShapeTester):
utt.verify_grad(f, [numpy.random.rand(4)]) utt.verify_grad(f, [numpy.random.rand(4)])
def test_allclose(self): def test_allclose(self):
m = theano.config.mode
m = theano.compile.get_mode(m)
m.check_isfinite = False
x, y = tensor.matrices('xy') x, y = tensor.matrices('xy')
# regular softmax and crossentropy # regular softmax and crossentropy
sm = tensor.nnet.softmax(x) sm = tensor.nnet.softmax(x)
...@@ -215,7 +218,7 @@ class T_LogSoftmax(utt.InferShapeTester): ...@@ -215,7 +218,7 @@ class T_LogSoftmax(utt.InferShapeTester):
# now show that the two versions result in the same crossentropy cost # now show that the two versions result in the same crossentropy cost
# this indicates that the forward function does provide some numerical # this indicates that the forward function does provide some numerical
# stability # stability
f2 = theano.function([x, y], [cm, cm2]) f2 = theano.function([x, y], [cm, cm2], mode=m)
cm_, cm2_ = f2(a, b) cm_, cm2_ = f2(a, b)
utt.assert_allclose(cm_, cm2_) utt.assert_allclose(cm_, cm2_)
...@@ -249,6 +252,9 @@ class T_LogSoftmax(utt.InferShapeTester): ...@@ -249,6 +252,9 @@ class T_LogSoftmax(utt.InferShapeTester):
grad and that the new operation does not explode for big inputs. grad and that the new operation does not explode for big inputs.
Note that only the grad is checked. Note that only the grad is checked.
""" """
m = theano.config.mode
m = theano.compile.get_mode(m)
m.check_isfinite = False
# some inputs that are large to make the gradient explode in the non # some inputs that are large to make the gradient explode in the non
# optimized case # optimized case
a = numpy.exp(10*numpy.random.rand(5, 10).astype(theano.config.floatX)) a = numpy.exp(10*numpy.random.rand(5, 10).astype(theano.config.floatX))
...@@ -258,7 +264,7 @@ class T_LogSoftmax(utt.InferShapeTester): ...@@ -258,7 +264,7 @@ class T_LogSoftmax(utt.InferShapeTester):
logsm = tensor.log(sm) logsm = tensor.log(sm)
return logsm return logsm
# We set step to 0.1 because for big values we need a big epsilon # We set step to 0.1 because for big values we need a big epsilon
utt.verify_grad(myfunc, [a], eps=0.1) utt.verify_grad(myfunc, [a], eps=0.1, mode=m)
class T_SoftmaxGrad(utt.InferShapeTester): class T_SoftmaxGrad(utt.InferShapeTester):
......
...@@ -3672,7 +3672,7 @@ def local_mul_switch_sink(node): ...@@ -3672,7 +3672,7 @@ def local_mul_switch_sink(node):
fct = [T.switch(switch.inputs[0], 0, fct = [T.switch(switch.inputs[0], 0,
fmul)] fmul)]
fct[0].values_eq_approx = values_eq_approx_remove_nan fct[0].tag.values_eq_approx = values_eq_approx_remove_nan
# Copy over stacktrace for switch op from both previous # Copy over stacktrace for switch op from both previous
# elementwise multiplication op and previous switch op, # elementwise multiplication op and previous switch op,
...@@ -3696,7 +3696,7 @@ def local_mul_switch_sink(node): ...@@ -3696,7 +3696,7 @@ def local_mul_switch_sink(node):
fct = [T.switch(switch.inputs[0], fct = [T.switch(switch.inputs[0],
fmul, 0)] fmul, 0)]
fct[0].values_eq_approx = values_eq_approx_remove_nan fct[0].tag.values_eq_approx = values_eq_approx_remove_nan
# Copy over stacktrace for switch op from both previous # Copy over stacktrace for switch op from both previous
# elementwise multiplication op and previous switch op, # elementwise multiplication op and previous switch op,
...@@ -3740,7 +3740,7 @@ def local_div_switch_sink(node): ...@@ -3740,7 +3740,7 @@ def local_div_switch_sink(node):
fct = [T.switch(switch.inputs[0], 0, fct = [T.switch(switch.inputs[0], 0,
fdiv)] fdiv)]
fct[0].values_eq_approx = values_eq_approx_remove_nan fct[0].tag.values_eq_approx = values_eq_approx_remove_nan
# Copy over stacktrace for switch op from both previous # Copy over stacktrace for switch op from both previous
# elementwise division op and previous switch op, # elementwise division op and previous switch op,
...@@ -3762,7 +3762,7 @@ def local_div_switch_sink(node): ...@@ -3762,7 +3762,7 @@ def local_div_switch_sink(node):
fct = [T.switch(switch.inputs[0], fct = [T.switch(switch.inputs[0],
fdiv, 0)] fdiv, 0)]
fct[0].values_eq_approx = values_eq_approx_remove_nan fct[0].tag.values_eq_approx = values_eq_approx_remove_nan
# Copy over stacktrace for switch op from both previous # Copy over stacktrace for switch op from both previous
# elementwise division op and previous switch op, # elementwise division op and previous switch op,
...@@ -5566,7 +5566,7 @@ def local_log_add(node): ...@@ -5566,7 +5566,7 @@ def local_log_add(node):
ret = max_pre + T.log1p(T.exp(T.add(*[p - max_pre ret = max_pre + T.log1p(T.exp(T.add(*[p - max_pre
for p in pre_exp]))) for p in pre_exp])))
ret.values_eq_approx = values_eq_approx_remove_inf ret.tag.values_eq_approx = values_eq_approx_remove_inf
return [ret] return [ret]
...@@ -5990,7 +5990,7 @@ def local_log_erfc(node): ...@@ -5990,7 +5990,7 @@ def local_log_erfc(node):
threshold = 26.641747557 threshold = 26.641747557
ret = T.switch(x < threshold, node.outputs[0], stab_value) ret = T.switch(x < threshold, node.outputs[0], stab_value)
ret.values_eq_approx = values_eq_approx_remove_inf ret.tag.values_eq_approx = values_eq_approx_remove_inf
return [ret] return [ret]
...@@ -6142,7 +6142,7 @@ def local_grad_log_erfc_neg(node): ...@@ -6142,7 +6142,7 @@ def local_grad_log_erfc_neg(node):
ret = T.switch(x < threshold, true_div_no_mul, stab_value) ret = T.switch(x < threshold, true_div_no_mul, stab_value)
if y: if y:
ret = T.mul(ret, *y) ret = T.mul(ret, *y)
ret.values_eq_approx = values_eq_approx_remove_inf_nan ret.tag.values_eq_approx = values_eq_approx_remove_inf_nan
return [ret] return [ret]
""" """
The libm used for the test is amdlibm The libm used for the test is amdlibm
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论