Merge pull request #5758 from jeffdonahue/sigmoid-bce

Add sigmoid_binary_crossentropy function

Merge pull request #5758 from jeffdonahue/sigmoid-bce
eb59a1f9 · Pascal Lamblin · GitHub · 44726397 · d30ff49e · eb59a1f9
--- a/doc/library/tensor/nnet/nnet.txt
+++ b/doc/library/tensor/nnet/nnet.txt
@@ -21,6 +21,7 @@
   - :func:`softsign`
   - :func:`relu() <theano.tensor.nnet.relu>`
   - :func:`binary_crossentropy`
+   - :func:`sigmoid_binary_crossentropy`
   - :func:`.categorical_crossentropy`
   - :func:`h_softmax() <theano.tensor.nnet.h_softmax>`
   - :func:`confusion_matrix <theano.tensor.nnet.confusion_matrix>`
@@ -171,6 +172,37 @@
       x_recons = T.nnet.sigmoid(T.dot(V, h) + c)
       recon_cost = T.nnet.binary_crossentropy(x_recons, x).mean()

+.. function:: sigmoid_binary_crossentropy(output,target)
+
+   Computes the binary cross-entropy between a target and the sigmoid of an output:
+    :Parameters:
+
+       * *target* - symbolic Tensor (or compatible)
+       * *output* - symbolic Tensor (or compatible)
+
+    :Return type: same as target
+    :Returns: a symbolic tensor, where the following is applied elementwise :math:`crossentropy(t,o) = -(t\cdot log(sigmoid(o)) + (1 - t) \cdot log(1 - sigmoid(o)))`.
+
+   It is equivalent to `binary_crossentropy(sigmoid(output), target)`,
+   but with more efficient and numerically stable computation, especially when
+   taking gradients.
+
+   The following block implements a simple auto-associator with a
+   sigmoid nonlinearity and a reconstruction error which corresponds
+   to the binary cross-entropy (note that this assumes that x will
+   contain values between 0 and 1):
+
+   .. testcode::
+
+       x, y, b, c = T.dvectors('x', 'y', 'b', 'c')
+       W = T.dmatrix('W')
+       V = T.dmatrix('V')
+       h = T.nnet.sigmoid(T.dot(W, x) + b)
+       x_precons = T.dot(V, h) + c
+       # final reconstructions are given by sigmoid(x_precons), but we leave
+       # them unnormalized as sigmoid_binary_crossentropy applies sigmoid
+       recon_cost = T.nnet.sigmoid_binary_crossentropy(x_precons, x).mean()
+
 .. function:: categorical_crossentropy(coding_dist,true_dist)

    Return the cross-entropy between an approximating distribution and a true distribution.

--- a/theano/tensor/nnet/__init__.py
+++ b/theano/tensor/nnet/__init__.py
@@ -4,7 +4,8 @@ from .nnet import (
    CrossentropySoftmax1HotWithBiasDx, CrossentropySoftmaxArgmax1HotWithBias,
    LogSoftmax, Prepend_scalar_constant_to_each_row,
    Prepend_scalar_to_each_row, Softmax,
-    SoftmaxGrad, SoftmaxWithBias, binary_crossentropy,
+    SoftmaxGrad, SoftmaxWithBias,
+    binary_crossentropy, sigmoid_binary_crossentropy,
    categorical_crossentropy, crossentropy_categorical_1hot,
    crossentropy_categorical_1hot_grad, crossentropy_softmax_1hot,
    crossentropy_softmax_1hot_with_bias,

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -2017,6 +2017,31 @@ def binary_crossentropy(output, target):
    return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output))


+def sigmoid_binary_crossentropy(output, target):
+    """
+    Compute the cross-entropy of binary random variables.
+
+    `output` should be real-valued (range (-inf, +inf)); `sigmoid` will be
+    applied to produce a (0, 1) valued input.
+
+    `target` is assumed to be probabilities in [0, 1].
+
+    Notes
+    -----
+    Mathematically equivalent to `binary_crossentropy(sigmoid(output), target)`,
+    but with more efficient and numerically stable computation.
+    """
+    def grad(inputs, out_grads):
+        (output, target), (out_grad,) = inputs, out_grads
+        g_output = out_grad * (sigmoid(output) - target)
+        g_target = out_grad * (-output)
+        return [g_output, g_target]
+    inp = [output, target]
+    outp = softplus(-abs(output)) + output * ((output > 0) - target)
+    return theano.OpFromGraph(inp, [outp], grad_overrides=grad, inline=True,
+                              name='sigmoid_binary_crossentropy')(*inp)
+
+
 def categorical_crossentropy(coding_dist, true_dist):
    """
    Return the cross-entropy between an approximating distribution and a true

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -33,6 +33,7 @@ from theano.tensor.nnet import (categorical_crossentropy,
                                h_softmax,
                                elu,
                                binary_crossentropy,
+                                sigmoid_binary_crossentropy,
                                confusion_matrix)
 from theano.tensor import matrix, vector, lvector, scalar
 from theano.tensor.nnet.nnet import softsign
@@ -1768,6 +1769,36 @@ SoftsignTester = makeBroadcastTester(
 )


+class T_sigmoid_binary_crossentropy(unittest.TestCase):
+
+    def setUp(self):
+        utt.seed_rng()
+
+    def _get_test_inputs(self, n=50):
+        pred, target = numpy.random.randn(2, n).astype(config.floatX)
+        # apply sigmoid to target, but not pred
+        return [pred, 1 / (1 + numpy.exp(-target))]
+
+    def test_matches_binary_crossentropy(self):
+        """
+        Test sigmoid_binary_crossentropy(p, t) ==
+             binary_crossentropy(sigmoid(p), t).
+        """
+        pred, target = inputs = tensor.vectors('pt')
+
+        reference_val = binary_crossentropy(sigmoid(pred), target)
+        f_reference = theano.function(inputs, reference_val)
+
+        test_val = sigmoid_binary_crossentropy(pred, target)
+        f_test = theano.function(inputs, test_val)
+
+        test_inputs = self._get_test_inputs()
+        utt.assert_allclose(f_reference(*test_inputs), f_test(*test_inputs))
+
+    def test_grad(self):
+        utt.verify_grad(sigmoid_binary_crossentropy, self._get_test_inputs())
+
+
 def test_confusion_matrix():
    # Defining numpy implementation of confusion matrix
    def numpy_conf_mat(actual, pred):