提交 eb59a1f9 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5758 from jeffdonahue/sigmoid-bce

Add sigmoid_binary_crossentropy function
......@@ -21,6 +21,7 @@
- :func:`softsign`
- :func:`relu() <theano.tensor.nnet.relu>`
- :func:`binary_crossentropy`
- :func:`sigmoid_binary_crossentropy`
- :func:`.categorical_crossentropy`
- :func:`h_softmax() <theano.tensor.nnet.h_softmax>`
- :func:`confusion_matrix <theano.tensor.nnet.confusion_matrix>`
......@@ -171,6 +172,37 @@
x_recons = T.nnet.sigmoid(T.dot(V, h) + c)
recon_cost = T.nnet.binary_crossentropy(x_recons, x).mean()
.. function:: sigmoid_binary_crossentropy(output,target)
Computes the binary cross-entropy between a target and the sigmoid of an output:
:Parameters:
* *target* - symbolic Tensor (or compatible)
* *output* - symbolic Tensor (or compatible)
:Return type: same as target
:Returns: a symbolic tensor, where the following is applied elementwise :math:`crossentropy(t,o) = -(t\cdot log(sigmoid(o)) + (1 - t) \cdot log(1 - sigmoid(o)))`.
It is equivalent to `binary_crossentropy(sigmoid(output), target)`,
but with more efficient and numerically stable computation, especially when
taking gradients.
The following block implements a simple auto-associator with a
sigmoid nonlinearity and a reconstruction error which corresponds
to the binary cross-entropy (note that this assumes that x will
contain values between 0 and 1):
.. testcode::
x, y, b, c = T.dvectors('x', 'y', 'b', 'c')
W = T.dmatrix('W')
V = T.dmatrix('V')
h = T.nnet.sigmoid(T.dot(W, x) + b)
x_precons = T.dot(V, h) + c
# final reconstructions are given by sigmoid(x_precons), but we leave
# them unnormalized as sigmoid_binary_crossentropy applies sigmoid
recon_cost = T.nnet.sigmoid_binary_crossentropy(x_precons, x).mean()
.. function:: categorical_crossentropy(coding_dist,true_dist)
Return the cross-entropy between an approximating distribution and a true distribution.
......
......@@ -4,7 +4,8 @@ from .nnet import (
CrossentropySoftmax1HotWithBiasDx, CrossentropySoftmaxArgmax1HotWithBias,
LogSoftmax, Prepend_scalar_constant_to_each_row,
Prepend_scalar_to_each_row, Softmax,
SoftmaxGrad, SoftmaxWithBias, binary_crossentropy,
SoftmaxGrad, SoftmaxWithBias,
binary_crossentropy, sigmoid_binary_crossentropy,
categorical_crossentropy, crossentropy_categorical_1hot,
crossentropy_categorical_1hot_grad, crossentropy_softmax_1hot,
crossentropy_softmax_1hot_with_bias,
......
......@@ -2017,6 +2017,31 @@ def binary_crossentropy(output, target):
return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output))
def sigmoid_binary_crossentropy(output, target):
"""
Compute the cross-entropy of binary random variables.
`output` should be real-valued (range (-inf, +inf)); `sigmoid` will be
applied to produce a (0, 1) valued input.
`target` is assumed to be probabilities in [0, 1].
Notes
-----
Mathematically equivalent to `binary_crossentropy(sigmoid(output), target)`,
but with more efficient and numerically stable computation.
"""
def grad(inputs, out_grads):
(output, target), (out_grad,) = inputs, out_grads
g_output = out_grad * (sigmoid(output) - target)
g_target = out_grad * (-output)
return [g_output, g_target]
inp = [output, target]
outp = softplus(-abs(output)) + output * ((output > 0) - target)
return theano.OpFromGraph(inp, [outp], grad_overrides=grad, inline=True,
name='sigmoid_binary_crossentropy')(*inp)
def categorical_crossentropy(coding_dist, true_dist):
"""
Return the cross-entropy between an approximating distribution and a true
......
......@@ -33,6 +33,7 @@ from theano.tensor.nnet import (categorical_crossentropy,
h_softmax,
elu,
binary_crossentropy,
sigmoid_binary_crossentropy,
confusion_matrix)
from theano.tensor import matrix, vector, lvector, scalar
from theano.tensor.nnet.nnet import softsign
......@@ -1768,6 +1769,36 @@ SoftsignTester = makeBroadcastTester(
)
class T_sigmoid_binary_crossentropy(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def _get_test_inputs(self, n=50):
pred, target = numpy.random.randn(2, n).astype(config.floatX)
# apply sigmoid to target, but not pred
return [pred, 1 / (1 + numpy.exp(-target))]
def test_matches_binary_crossentropy(self):
"""
Test sigmoid_binary_crossentropy(p, t) ==
binary_crossentropy(sigmoid(p), t).
"""
pred, target = inputs = tensor.vectors('pt')
reference_val = binary_crossentropy(sigmoid(pred), target)
f_reference = theano.function(inputs, reference_val)
test_val = sigmoid_binary_crossentropy(pred, target)
f_test = theano.function(inputs, test_val)
test_inputs = self._get_test_inputs()
utt.assert_allclose(f_reference(*test_inputs), f_test(*test_inputs))
def test_grad(self):
utt.verify_grad(sigmoid_binary_crossentropy, self._get_test_inputs())
def test_confusion_matrix():
# Defining numpy implementation of confusion matrix
def numpy_conf_mat(actual, pred):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论