Merge pull request #1391 from nouiz/sigmoid

Add ultra_fast_sigmoid

Merge pull request #1391 from nouiz/sigmoid
00fbaf1d · lamblin · 62270d2f · d5c0893a · 00fbaf1d · 00fbaf1d
--- a/doc/library/tensor/nnet/nnet.txt
+++ b/doc/library/tensor/nnet/nnet.txt
@@ -15,6 +15,7 @@
    :Parameters: *x* - symbolic Tensor (or compatible)
    :Return type: same as x
    :Returns: element-wise sigmoid: :math:`sigmoid(x) = \frac{1}{1 + \exp(-x)}`.
+    :note: see :func:`ultra_fast_sigmoid` for a faster version

 Example:

@@ -26,6 +27,18 @@ Example:

 .. note:: The underlying code will return an exact 0 or 1 if an element of x is too small or too big.

+.. function:: ultra_fast_sigmoid(x)
+
+   Returns the standard sigmoid nonlinearity applied to x
+    :Parameters: *x* - symbolic Tensor (or compatible)
+    :Return type: same as x
+    :Returns: approximated element-wise sigmoid: :math:`sigmoid(x) = \frac{1}{1 + \exp(-x)}`.
+    :note: To automatically change all sigmoid op to this version, use
+      the Theano optimization ``local_ultra_fast_sigmoid``. This can be done
+      with the Theano flag ``optimizer_including=local_ultra_fast_sigmoid``.
+      This optimization is done late, so it shouldn't affect
+      stabilization optimization.
+
 .. function:: softplus(x)

   Returns the softplus nonlinearity applied to x

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -922,7 +922,8 @@ class TensorType(Type):
            return False

    @staticmethod
-    def values_eq_approx(a, b, allow_remove_inf=False, allow_remove_nan=False):
+    def values_eq_approx(a, b, allow_remove_inf=False, allow_remove_nan=False,
+                         rtol=None, atol=None):
        """
        :param allow_remove_inf: If True, when there is an inf in a,
                                 we allow any value in b in that position.
@@ -930,6 +931,8 @@ class TensorType(Type):
        :param allow_remove_nan: If True, when there is a nan in a,
                                 we allow any value in b in that position.
                                 Event +-inf
+        :param rtol: relative tolerance, passed to _allclose
+        :param atol: absolute tolerance, passed to _allclose
        """
        if isinstance(a, numpy.ndarray) and isinstance(b, numpy.ndarray):
            if a.shape != b.shape:
@@ -945,7 +948,7 @@ class TensorType(Type):
                    a = a.reshape(1)
                    b = b.reshape(1)

-                cmp = _allclose(a, b)
+                cmp = _allclose(a, b, rtol=rtol, atol=atol)
                if cmp:
                    # Numpy claims they are close, this is good enough for us.
                    return True

--- a/theano/tensor/nnet/__init__.py
+++ b/theano/tensor/nnet/__init__.py
@@ -3,4 +3,5 @@ from conv import conv2d, ConvOp
 from Conv3D import *
 from ConvGrad3D import *
 from ConvTransp3D import *
-from sigm import softplus, sigmoid, sigmoid_inplace, scalar_sigmoid
+from sigm import (softplus, sigmoid, sigmoid_inplace,
+                  scalar_sigmoid, ultra_fast_sigmoid)
--- a/theano/tensor/nnet/sigm.py
+++ b/theano/tensor/nnet/sigm.py
@@ -10,9 +10,8 @@ import numpy

 import theano
 from theano import config, gof, printing, scalar
-from theano.compile import optdb
 from theano.configparser import AddConfigVar, BoolParam
-from theano.printing import pprint, debugprint
+from theano.printing import pprint
 from theano.tensor import basic as tensor
 from theano.tensor import elemwise, opt, NotScalarConstantError

@@ -50,11 +49,19 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
    def c_code(self, node, name, inp, out, sub):
        x, = inp
        z, = out
+        # We add boundary checks prevent exp from generating inf or
+        # 0. The reset of the logic always generate 0 or 1 in those
+        # cases. This is a speed optimization.
+        # The constants were obtained by looking at the output of python commands like:
+        """
+import numpy, theano
+dt='float32'  # or float64
+for i in xrange(750):
+    print i, repr(theano._asarray(1.0, dtype=dt) /
+                              (theano._asarray(1.0, dtype=dt) +
+                              numpy.exp(-theano._asarray([i,-i], dtype=dt))))
+        """
        if node.inputs[0].type == scalar.float32:
-            # These constants were obtained by looking at the output of python commands like:
-            #  for i in xrange(750):
-            #      print i, repr( theano._asarray(1.0, dtype=dt) / (theano._asarray(1.0, dtype=dt) + numpy.exp(-theano._asarray([i,-i], dtype=dt))))
-            # the boundary checks prevent us from generating inf
            return """%(z)s = %(x)s < -88.0f ? 0.0 : %(x)s > 15.0f ? 1.0f : 1.0f /(1.0f + exp(-%(x)s));""" % locals()
        elif node.inputs[0].type == scalar.float64:
            return """%(z)s = %(x)s < -709.0 ? 0.0 : %(x)s > 19.0 ? 1.0 : 1.0 /(1.0+exp(-%(x)s));""" % locals()
@@ -117,6 +124,105 @@ sigmoid_inplace = elemwise.Elemwise(
 pprint.assign(sigmoid, printing.FunctionPrinter('sigmoid'))


+class UltraFastScalarSigmoid(scalar.UnaryScalarOp):
+    """
+    This is just speed opt. Not for stability.
+    """
+    @staticmethod
+    def st_impl(x):
+        x = 0.5 * x
+        # The if is a tanh approximate.
+        if x >= 0:
+            if x < 1.7:
+                z = (1.5 * x / (1 + x))
+            elif x < 3:
+                z = (0.935409070603099 + 0.0458812946797165 * (x - 1.7))
+            else:
+                z = 0.99505475368673
+        else:
+            xx = -x
+            if xx < 1.7:
+                z = (1.5 * xx / (1 + xx))
+            elif xx < 3:
+                z = (0.935409070603099 + 0.0458812946797165 * (xx - 1.7))
+            else:
+                z = 0.99505475368673
+            z = -z
+
+        return 0.5 * (z + 1.)
+
+    def impl(self, x):
+        return UltraFastScalarSigmoid.st_impl(x)
+
+    def c_code(self, node, name, inp, out, sub):
+        x, = inp
+        z, = out
+        dtype = node.outputs[0].type.dtype_specs()[1]
+
+        return """
+        %(dtype)s x = 0.5 * %(x)s;
+   // The if is a tanh approximate.
+   if(x>=0) {
+        %(z)s = (x<1.7 ? (1.5*x/(1+x)) :
+                         (x<3 ? (0.935409070603099 + 0.0458812946797165*(x-1.7)):
+                         0.99505475368673));
+    } else {
+        %(dtype)s xx = -x;
+        %(z)s = -(xx<1.7 ? (1.5*xx/(1+xx)) :
+                           (xx<3 ? (0.935409070603099 + 0.0458812946797165*(xx-1.7)):
+                                   0.99505475368673));
+    }
+
+        //%(z)s = 0.5*(ultrafasttanh(0.5*x)+1.);
+        %(z)s = 0.5*(%(z)s+1.);
+        """ % locals()
+
+ultra_fast_scalar_sigmoid = UltraFastScalarSigmoid(
+    scalar.upgrade_to_float, name='ultra_fast_scalar_sigmoid')
+ultra_fast_sigmoid = elemwise.Elemwise(ultra_fast_scalar_sigmoid,
+                                       name='ultra_fast_sigmoid')
+
+ultra_fast_sigmoid_inplace = elemwise.Elemwise(
+    UltraFastScalarSigmoid(scalar.transfer_type(0)),
+    inplace_pattern={0: 0},
+    name='ultra_fast_sigmoid_inplace',
+)
+
+pprint.assign(ultra_fast_sigmoid,
+              printing.FunctionPrinter('ultra_fast_sigmoid'))
+
+
+#@opt.register_uncanonicalize
+@gof.local_optimizer([sigmoid])
+def local_ultra_fast_sigmoid(node):
+    """
+    When enabled, change all sigmoid to ultra_fast_sigmoid.
+
+    To example do mode.including('local_ultra_fast_sigmoid')
+    or use the Theano flag optimizer_including=local_ultra_fast_sigmoid
+
+    This speed up the sigmoid op by using an approximation.
+
+    This is done after the stabilization and specialize phase
+    to don't interact with them.
+
+    """
+    if (isinstance(node.op, tensor.Elemwise) and
+            node.op.scalar_op == scalar_sigmoid):
+        out = ultra_fast_sigmoid(node.inputs[0])
+        out2 = ultra_fast_sigmoid(node.inputs[0])
+
+        def values_eq_approx_remove_low_prec(a, b):
+            # atol is found by trial/error.
+            # Other test could fail without good reason.
+            return tensor.TensorType.values_eq_approx(a, b, atol=0.02)
+        # Let DebugMode know that there this opt approx the values.
+        out.values_eq_approx = values_eq_approx_remove_low_prec
+        return [out]
+theano.compile.optdb['uncanonicalize'].register("local_ultra_fast_sigmoid",
+                                                local_ultra_fast_sigmoid)
+
+
 class ScalarSoftplus(scalar.UnaryScalarOp):
    @staticmethod
    def static_impl(x):

--- a/theano/tensor/nnet/tests/test_sigm.py
+++ b/theano/tensor/nnet/tests/test_sigm.py
@@ -4,13 +4,19 @@ from itertools import imap
 import numpy

 import theano.tensor.inplace
+from theano.tensor import basic as tensor
 from theano import tensor as T
 from theano import config
 from theano.tests import unittest_tools as utt
-from theano.tensor.nnet import sigmoid, sigmoid_inplace, softplus, tensor
+from theano.tensor.nnet import (sigmoid, sigmoid_inplace,
+                                softplus, ultra_fast_sigmoid)
 from theano.tensor.nnet.sigm import (
    compute_mul, is_1pexp, parse_mul_tree, perform_sigm_times_exp,
-        register_local_1msigmoid, simplify_mul)
+    register_local_1msigmoid, simplify_mul,
+)
+from theano.tensor.tests.test_basic import (makeBroadcastTester, rand,
+                                            check_floatX,
+                                            _good_broadcast_unary_normal_no_complex)


 class T_sigmoid(unittest.TestCase):
@@ -21,6 +27,36 @@ class T_sigmoid(unittest.TestCase):
        utt.verify_grad(sigmoid, [numpy.random.rand(3, 4)])


+SigmoidTester = makeBroadcastTester(
+    op=sigmoid,
+    expected=lambda inputs: check_floatX(
+        inputs, 1/(1+numpy.exp(-inputs))),
+    good=_good_broadcast_unary_normal_no_complex,
+    #grad=_grad_broadcast_unary_normal,
+    name='SigmoidTester',
+)
+
+UltraFastSigmoidTester = makeBroadcastTester(
+    op=ultra_fast_sigmoid,
+    expected=lambda inputs: check_floatX(
+        inputs, 1/(1+numpy.exp(-inputs))),
+    good=_good_broadcast_unary_normal_no_complex,
+    #grad=_grad_broadcast_unary_normal,
+    name='UltraFastSigmoidTester',
+# This is an approx of the sigmoid. That is why we raise eps
+    eps=5e-2)
+
+
+SoftplusTester = makeBroadcastTester(
+    op=softplus,
+    expected=lambda inputs: check_floatX(
+        inputs, numpy.log1p(numpy.exp(inputs))),
+    good=_good_broadcast_unary_normal_no_complex,
+    #grad=_grad_broadcast_unary_normal,
+    name='SoftplusTester',
+)
+
+
 class T_softplus(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
@@ -253,6 +289,19 @@ class T_sigmoid_opts(unittest.TestCase):
            ux_v = f([[50]], 0.1)
            assert not numpy.isnan(ux_v)

+    def test_local_ultra_fast_sigmoid(self):
+        x = tensor.matrix('x')
+        s = sigmoid(x)
+
+        mode = self.get_mode('local_ultra_fast_sigmoid')
+        f = theano.function([x], s, mode=mode)
+        assert f.maker.fgraph.toposort()[0].op == sigmoid
+
+        mode = self.get_mode().including('local_ultra_fast_sigmoid')
+        f = theano.function([x], s, mode=mode)
+        assert f.maker.fgraph.toposort()[0].op == ultra_fast_sigmoid
+        ux_v = f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
+

 class T_softplus_opts(unittest.TestCase):
    def setUp(self):

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -219,7 +219,8 @@ def makeTester(name, op, expected, checks=None, good=None, bad_build=None,

        def setUp(self):
            # Verify that the test's name is correctly set.
-            assert eval(self.__class__.__name__) is self.__class__
+            # Some tests reuse it outside this module.
+            eval(self.__class__.__module__ + '.' + self.__class__.__name__)

            # We keep a list of temporary files created in add_memmap_values,
            # to remove them at the end of the test.