提交 f05a0c89 authored 作者: fvisin's avatar fvisin 提交者: Francesco Visin

Add LogSoftmax python code and tests

上级 30cc6380
...@@ -431,6 +431,7 @@ class Softmax(gof.Op): ...@@ -431,6 +431,7 @@ class Softmax(gof.Op):
x.type) x.type)
if x.ndim == 1: if x.ndim == 1:
x = tensor.shape_padleft(x, n_ones=1) x = tensor.shape_padleft(x, n_ones=1)
return Apply(self, [x], [x.type()]) return Apply(self, [x], [x.type()])
def perform(self, node, input_storage, output_storage): def perform(self, node, input_storage, output_storage):
...@@ -599,6 +600,52 @@ class Softmax(gof.Op): ...@@ -599,6 +600,52 @@ class Softmax(gof.Op):
softmax_op = Softmax() softmax_op = Softmax()
class LogSoftmax(gof.Op):
"""
LogSoftmax activation function
:math:`\\varphi(\\mathbf{x})_j =
\\e^{(\mathbf{x}_j - log{\sum_{k=1}^K e^{\mathbf{x}_k})}}
where :math:`K` is the total number of neurons in the layer. This
activation function gets applied row-wise.
"""
def make_node(self, x):
x = tensor.as_tensor_variable(x)
if x.type.ndim not in (1, 2) \
or x.type.dtype not in tensor.float_dtypes:
raise ValueError('x must be 1-d or 2-d tensor of floats. Got %s' %
x.type)
if x.ndim == 1:
x = tensor.shape_padleft(x, n_ones=1)
return Apply(self, [x], [x.type()])
def perform(self, node, input_storage, output_storage):
x, = input_storage
xdev = x - x.max(axis=1)[:, None]
lsm = xdev - numpy.log(numpy.sum(numpy.exp(xdev), axis=1,
keepdims=True))
output_storage[0][0] = lsm
def grad(self, inp, grads):
x, = inp
sm = softmax_op(x)
return [grads[0] - tensor.sum(grads[0], axis=1, keepdims=True) * sm]
def R_op(self, inputs, eval_points):
# I think the Jacobian is symmetric so the R_op
# is the same as the grad
if None in eval_points:
return [None]
return self.grad(inputs, eval_points)
def infer_shape(self, node, shape):
return shape
logsoftmax_op = LogSoftmax()
def softmax_graph(c): def softmax_graph(c):
return tensor.exp(c) / tensor.exp(c).sum(axis=-1, keepdims=True) return tensor.exp(c) / tensor.exp(c).sum(axis=-1, keepdims=True)
...@@ -607,6 +654,10 @@ def softmax(c): ...@@ -607,6 +654,10 @@ def softmax(c):
return softmax_op(c) return softmax_op(c)
def logsoftmax(c):
return logsoftmax_op(c)
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([softmax_op]) @gof.local_optimizer([softmax_op])
def local_softmax_with_bias(node): def local_softmax_with_bias(node):
......
...@@ -24,8 +24,8 @@ from theano.tensor.nnet import (categorical_crossentropy, ...@@ -24,8 +24,8 @@ from theano.tensor.nnet import (categorical_crossentropy,
CrossentropyCategorical1HotGrad, CrossentropyCategorical1HotGrad,
sigmoid, softplus, Softmax, softmax, sigmoid, softplus, Softmax, softmax,
softmax_op, softmax_graph, SoftmaxWithBias, softmax_op, softmax_graph, SoftmaxWithBias,
softmax_grad, softmax_with_bias, LogSoftmax, logsoftmax_op,
softmax_with_bias, SoftmaxGrad, softmax_grad, SoftmaxGrad,
Prepend_scalar_constant_to_each_row, Prepend_scalar_constant_to_each_row,
Prepend_scalar_to_each_row, Prepend_scalar_to_each_row,
relu, relu,
...@@ -122,9 +122,9 @@ class T_SoftmaxWithBias(utt.InferShapeTester): ...@@ -122,9 +122,9 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
# test that we don't raise an error during optimization for no good # test that we don't raise an error during optimization for no good
# reason as softmax_with_bias don't support correctly some/all # reason as softmax_with_bias don't support correctly some/all
# broadcasted inputs pattern # broadcasted inputs pattern
initial_W = numpy.asarray([[0.1, 0.1, 0.1], \ initial_W = numpy.asarray([[0.1, 0.1, 0.1],
[0.1, 0.1, 0.1], \ [0.1, 0.1, 0.1],
[0.1, 0.1, 0.1]], \ [0.1, 0.1, 0.1]],
dtype=theano.config.floatX) dtype=theano.config.floatX)
W = theano.shared(value=initial_W, name='W') W = theano.shared(value=initial_W, name='W')
vbias = theano.shared(value=0.1, name='vbias') # 0.01 vbias = theano.shared(value=0.1, name='vbias') # 0.01
...@@ -148,6 +148,82 @@ class T_SoftmaxWithBias(utt.InferShapeTester): ...@@ -148,6 +148,82 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
[admat_val, advec_val], SoftmaxWithBias) [admat_val, advec_val], SoftmaxWithBias)
class T_LogSoftmax(utt.InferShapeTester):
def test0(self):
def f(a):
return logsoftmax_op(a)[:, 0]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test1(self):
def f(a):
return logsoftmax_op(a)[:, 1]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test2(self):
def f(a):
return logsoftmax_op(a)[:, 2]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test3(self):
def f(a):
return logsoftmax_op(a)[:, 3]
utt.verify_grad(f, [numpy.random.rand(3, 4)])
def test_vector(self):
x = T.vector()
f = theano.function([x], logsoftmax_op(x))
xv = numpy.random.randn(6).astype(config.floatX)
assert numpy.allclose(f(xv),
numpy.log(numpy.exp(xv) / numpy.exp(xv).sum()))
def test_vector_grad(self):
def f(a):
return softmax_op(a)
utt.verify_grad(f, [numpy.random.rand(4)])
def test_allclose(self):
x, y = tensor.matrices('xy')
# regular softmax and crossentropy
sm = tensor.nnet.softmax(x)
cm = tensor.nnet.categorical_crossentropy(sm, y)
# numerically stable log-softmax with crossentropy
logsm = tensor.nnet.logsoftmax(x)
sm2 = tensor.exp(logsm) # just used to show equivalence with sm
cm2 = -tensor.sum(y*logsm, axis=1)
grad = tensor.grad(cm2.mean(), x)
# create some inputs into a softmax that are large and labels
a = numpy.exp(10*numpy.random.rand(5, 10).astype(theano.config.floatX))
# create some one-hot coded labels
b = numpy.eye(5, 10).astype(theano.config.floatX)
# show equivalence of softmax and exponentiated numerically stable
# log-softmax
f1 = theano.function([x], [sm, sm2])
sm_, sm2_ = f1(a)
utt.assert_allclose(sm_, sm2_)
# now show that the two versions result in the same crossentropy cost
# this indicates that the forward function does provide some numerical
# stability
f2 = theano.function([x, y], [cm, cm2])
cm_, cm2_ = f2(a, b)
utt.assert_allclose(cm_, cm2_)
# now, show that in the standard softmax case the gradients blow up
# while in the log-softmax case they don't
f3 = theano.function([x, y], [grad])
grad_ = f3(a, b)
assert numpy.all(numpy.isnan(grad_) == False)
def test_isclose(self):
def f(a):
return logsoftmax_op(a)
class T_SoftmaxGrad(utt.InferShapeTester): class T_SoftmaxGrad(utt.InferShapeTester):
def test_infer_shape(self): def test_infer_shape(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论