Merge pull request #3048 from f0k/relu-function

Add theano.tensor.nnet.relu function

Merge pull request #3048 from f0k/relu-function
a1bee35d · Frédéric Bastien · 4ef14003 · ea498bf5 · a1bee35d · a1bee35d
--- a/doc/library/tensor/nnet/nnet.txt
+++ b/doc/library/tensor/nnet/nnet.txt
@@ -18,6 +18,7 @@
 - Others
   - :func:`softplus`
   - :func:`softmax`
+   - :func:`relu() <theano.tensor.nnet.relu>`
   - :func:`binary_crossentropy`
   - :func:`.categorical_crossentropy`

@@ -136,6 +137,8 @@
       W = T.dmatrix('W')
       y = T.nnet.softmax(T.dot(W,x) + b)

+.. autofunction:: theano.tensor.nnet.relu
+
 .. function:: binary_crossentropy(output,target)

   Computes the binary cross-entropy between a target and an output:

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -2061,3 +2061,38 @@ local_log_softmax = gof.PatternSub(in_pattern=(tensor.log, (softmax, 'x')),
 # only after another more specific optimization that stabilizes cross entropy
 #opt.register_stabilize(local_log_softmax, name = 'local_log_softmax')
 opt.register_specialize(local_log_softmax, 'fast_compile_gpu', name='local_log_softmax')
+
+
+def relu(x, alpha=0):
+    """
+    Compute the element-wise rectified linear activation function.
+
+    :type x: symbolic tensor
+    :param x: Tensor to compute the activation function for.
+
+    :type alpha: scalar or tensor, optional
+    :param alpha: Slope for negative input, usually between 0 and 1. The
+        default value of 0 will lead to the standard rectifier, 1 will lead to
+        a linear activation function, and any value in between will give a
+        leaky rectifier. A shared variable (broadcastable against `x`) will
+        result in a parameterized rectifier with learnable slope(s).
+
+    :rtype: symbolic tensor
+    :return: element-wise rectifier applied to `x`
+
+    .. note:: This is numerically equivalent to
+        ``T.switch(x > 0, x, alpha * x)``
+        (or ``T.maximum(x, alpha * x)`` for ``alpha < 1``), but uses a faster
+        formulation or an optimized Op, so we encourage to use this function.
+
+    """
+    # This is probably the fastest implementation for GPUs. Both the forward
+    # pass and the gradient get compiled into a single GpuElemwise call.
+    # TODO: Check if it's optimal for CPU as well; add an "if" clause if not.
+    # TODO: Check if there's a faster way for the gradient; create an Op if so.
+    if alpha == 0:
+        return 0.5 * (x + abs(x))
+    else:
+        f1 = 0.5 * (1 + alpha)
+        f2 = 0.5 * (1 - alpha)
+        return f1 * x + f2 * abs(x)
--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -26,7 +26,8 @@ from theano.tensor.nnet import (categorical_crossentropy,
                                softmax_grad,
                                softmax_with_bias, SoftmaxGrad,
                                Prepend_scalar_constant_to_each_row,
-                                Prepend_scalar_to_each_row)
+                                Prepend_scalar_to_each_row,
+                                relu)
 from theano.tensor import matrix, vector, lvector, scalar


@@ -1394,5 +1395,30 @@ def test_stabilize_log_softmax():
    rng = numpy.random.RandomState([2012, 8, 22])
    f(numpy.cast[config.floatX](rng.randn(2, 3)))

+
+def test_relu():
+    x = matrix('x')
+    seed = theano.tests.unittest_tools.fetch_seed()
+    rng = numpy.random.RandomState(seed)
+    X = rng.randn(20, 30).astype(config.floatX)
+
+    # test the base case, without custom alpha value
+    y = theano.tensor.nnet.relu(x).eval({x: X})
+    assert numpy.allclose(y, numpy.maximum(X, 0))
+
+    # test for different constant alpha values (also outside of [0, 1])
+    for alpha in 0, 0.3, 1, 2, -0.3, -1, -2:
+        y = theano.tensor.nnet.relu(x, alpha).eval({x: X})
+        assert numpy.allclose(y, numpy.where(X > 0, X, alpha * X))
+
+    # test for variable alpha (scalar, vector and matrix)
+    for alpha in scalar(), vector(), matrix():
+        # create value for alpha (correct ndim and broadcastable against X)
+        A = numpy.array(rng.randn(*X.shape[::-1][:alpha.ndim][::-1]),
+                        dtype=config.floatX)
+        y = theano.tensor.nnet.relu(x, alpha).eval({x: X, alpha: A})
+        assert numpy.allclose(y, numpy.where(X > 0, X, A * X), rtol=3e-5)
+
+
 if __name__ == '__main__':
    unittest.main()