提交 2f739edc authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix the LogSoftmax tests in DebugMode.

上级 666b86b3
...@@ -19,9 +19,9 @@ from six.moves import xrange ...@@ -19,9 +19,9 @@ from six.moves import xrange
import theano import theano
from theano import gof from theano import gof
from theano import scalar from theano import scalar
from theano.tensor import basic as tensor from theano.tensor import basic as tensor, subtensor, opt
from theano.tensor import subtensor from theano.tensor.type import (values_eq_approx_remove_inf,
from theano.tensor import opt values_eq_approx_remove_nan)
from theano.tensor.opt import copy_stack_trace from theano.tensor.opt import copy_stack_trace
from theano.compile import optdb from theano.compile import optdb
from theano.gof import Apply from theano.gof import Apply
...@@ -751,7 +751,9 @@ def local_logsoftmax(node): ...@@ -751,7 +751,9 @@ def local_logsoftmax(node):
isinstance(node.inputs[0].owner.op, Softmax)): isinstance(node.inputs[0].owner.op, Softmax)):
inVars = node.inputs[0].owner.inputs[0] inVars = node.inputs[0].owner.inputs[0]
new_op = LogSoftmax() new_op = LogSoftmax()
return [new_op(inVars)] ret = new_op(inVars)
ret.values_eq_approx = values_eq_approx_remove_inf
return [ret]
@opt.register_specialize('stabilize', 'fast_compile') @opt.register_specialize('stabilize', 'fast_compile')
...@@ -784,7 +786,9 @@ def local_logsoftmax_grad(node): ...@@ -784,7 +786,9 @@ def local_logsoftmax_grad(node):
if grads.broadcastable[1] and not sm.broadcastable[1]: if grads.broadcastable[1] and not sm.broadcastable[1]:
grads = tensor.alloc(grads, grads.shape[0], sm.shape[1]) grads = tensor.alloc(grads, grads.shape[0], sm.shape[1])
return [grads - tensor.sum(grads, axis=1, keepdims=True) * sm] ret = grads - tensor.sum(grads, axis=1, keepdims=True) * sm
ret.values_eq_approx = values_eq_approx_remove_nan
return [ret]
def softmax_graph(c): def softmax_graph(c):
......
...@@ -190,6 +190,9 @@ class T_LogSoftmax(utt.InferShapeTester): ...@@ -190,6 +190,9 @@ class T_LogSoftmax(utt.InferShapeTester):
utt.verify_grad(f, [numpy.random.rand(4)]) utt.verify_grad(f, [numpy.random.rand(4)])
def test_allclose(self): def test_allclose(self):
m = theano.config.mode
m = theano.compile.get_mode(m)
m.check_isfinite = False
x, y = tensor.matrices('xy') x, y = tensor.matrices('xy')
# regular softmax and crossentropy # regular softmax and crossentropy
sm = tensor.nnet.softmax(x) sm = tensor.nnet.softmax(x)
...@@ -215,7 +218,7 @@ class T_LogSoftmax(utt.InferShapeTester): ...@@ -215,7 +218,7 @@ class T_LogSoftmax(utt.InferShapeTester):
# now show that the two versions result in the same crossentropy cost # now show that the two versions result in the same crossentropy cost
# this indicates that the forward function does provide some numerical # this indicates that the forward function does provide some numerical
# stability # stability
f2 = theano.function([x, y], [cm, cm2]) f2 = theano.function([x, y], [cm, cm2], mode=m)
cm_, cm2_ = f2(a, b) cm_, cm2_ = f2(a, b)
utt.assert_allclose(cm_, cm2_) utt.assert_allclose(cm_, cm2_)
...@@ -249,6 +252,9 @@ class T_LogSoftmax(utt.InferShapeTester): ...@@ -249,6 +252,9 @@ class T_LogSoftmax(utt.InferShapeTester):
grad and that the new operation does not explode for big inputs. grad and that the new operation does not explode for big inputs.
Note that only the grad is checked. Note that only the grad is checked.
""" """
m = theano.config.mode
m = theano.compile.get_mode(m)
m.check_isfinite = False
# some inputs that are large to make the gradient explode in the non # some inputs that are large to make the gradient explode in the non
# optimized case # optimized case
a = numpy.exp(10*numpy.random.rand(5, 10).astype(theano.config.floatX)) a = numpy.exp(10*numpy.random.rand(5, 10).astype(theano.config.floatX))
...@@ -258,7 +264,7 @@ class T_LogSoftmax(utt.InferShapeTester): ...@@ -258,7 +264,7 @@ class T_LogSoftmax(utt.InferShapeTester):
logsm = tensor.log(sm) logsm = tensor.log(sm)
return logsm return logsm
# We set step to 0.1 because for big values we need a big epsilon # We set step to 0.1 because for big values we need a big epsilon
utt.verify_grad(myfunc, [a], eps=0.1) utt.verify_grad(myfunc, [a], eps=0.1, mode=m)
class T_SoftmaxGrad(utt.InferShapeTester): class T_SoftmaxGrad(utt.InferShapeTester):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论