Merge pull request #594 from lamblin/log1msigm

Add MergeOptimizer in canonicalize phase

Merge pull request #594 from lamblin/log1msigm
f57b7b77 · nouiz · 4d943bea · 19c08e65 · f57b7b77 · f57b7b77
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -520,24 +520,45 @@ import theano and print the config variable, as in:
    Bool value, default: False

    Should each SeqOptimizer object print the time taken by each of its
-    optimizer. Each SeqOptimizer prints something like this:
+    optimizer. Each SeqOptimizer prints something like this::

-    SeqOptimizer gpu_opt  time 0.014s for 8/9 nodes before/after optimization
-    [(0.0004410743713378906, ('InputToGpuOptimizer',
-    'InputToGpuOptimizer')), (0.0013730525970458984,
-    ('gpu_cut_transfers', 'EquilibriumOptimizer')),
-    (0.012573957443237305, ('gpu_local_optimizations',
-    'EquilibriumOptimizer'))]
+        SeqOptimizer gpu_opt  time 0.014s for 8/9 nodes before/after optimization
+          0.012573s - ('gpu_local_optimizations', 'EquilibriumOptimizer')
+          0.001373s - ('gpu_cut_transfers', 'EquilibriumOptimizer')
+          0.000441s - ('InputToGpuOptimizer', 'InputToGpuOptimizer')

    This prints the name of the SeqOptimizer (gpu_opt), the number of
    Apply nodes in the graph before (8) and after (9)
-    optimizations. Then a list of tuples with 1 tuple per optimization
-    in this SeqOptimizer. The first element of the tuple is the time
+    optimizations. Then a list of lines, one per optimization
+    in this SeqOptimizer. The first element is the time
    taken by this optimization and then it is a tuple with the name of the
    optimization and this class. This list is sorted from the sub
    optimization that takes the most time to the optimization that takes
    the least time.

+.. attribute:: config.time_eq_optimizer
+
+    Bool value, default: False
+
+    Should each EquilibriumOptimizer print the time taken by each of its
+    iterations, the total number of times each of its optimizers is applied,
+    and informations about the total number of nodes in the graph.
+    Here is an example of output::
+
+        EquilibriumOptimizer specialize
+         time 4.760s for 4 passes, 3801 nodes max
+        0 - 1.961s (0.079s in global opts) - 3797 nodes
+        1 - 1.233s (0.080s in global opts) - 3801 nodes
+        2 - 0.857s (0.071s in global opts) - 3203 nodes
+        3 - 0.710s (0.066s in global opts) - 3095 nodes
+
+        times applied - optimizer:
+          384 - dimshuffle_as_view
+          262 - constant_folding
+          216 - local_subtensor_make_vector
+          216 - local_shape_to_shape_i
+          4 - local_mul_specialize
+
 .. attribute:: config.cmodule.warn_no_version

    Bool value, default: False

--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -29,6 +29,11 @@ AddConfigVar('time_seq_optimizer',
        BoolParam(False),
        in_c_key=False)

+AddConfigVar('time_eq_optimizer',
+        "Should EquilibriumOptimizer print the time taken by each optimizer",
+        BoolParam(False),
+        in_c_key=False)
+
 import destroyhandler as dh
 import traceback

@@ -174,7 +179,10 @@ class SeqOptimizer(Optimizer, list):
                if a[0]<b[0]: return -1
                return 1
            lll.sort(cmp)
-            print lll
+
+            for (t, opt) in lll[::-1]:
+                print '  %.6fs - %s' % (t, opt)
+            print

    def __eq__(self, other):
        #added to override the list's __eq__ implementation
@@ -1115,7 +1123,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):
        process_count = {}
        max_nb_nodes = 0

+        loop_timing = []
+        global_opt_timing = []
+        nb_nodes = []
+
        while changed and not max_use_abort:
+            t0 = time.time()
            changed = False

            #apply global optimizer
@@ -1125,12 +1138,15 @@ class EquilibriumOptimizer(NavigatorOptimizer):
            if env.change_tracker.changed:
                changed = True

+            global_opt_timing.append(float(time.time() - t0))
+
            #apply local optimizer
            for node in start_from:
                assert node in env.outputs

            q = deque(graph.io_toposort(env.inputs, start_from))

+            nb_nodes.append(len(q))
            max_nb_nodes = max(max_nb_nodes, len(q))
            max_use = max_nb_nodes * self.max_use_ratio
            def importer(node):
@@ -1163,12 +1179,37 @@ class EquilibriumOptimizer(NavigatorOptimizer):
            finally:
                self.detach_updater(env, u)
            self.detach_updater(env, u) #TODO: erase this line, it's redundant at best
+
+            loop_timing.append(float(time.time() - t0))
        if max_use_abort:
            _logger.error("EquilibriumOptimizer max'ed out by '%s'" % opt_name
                          + ". You can safely raise the current threshold of "
                          + "%f with the theano flag 'optdb.max_use_ratio'." %
                          config.optdb.max_use_ratio)

+        if config.time_eq_optimizer:
+            print "EquilibriumOptimizer",
+            print getattr(self, "name", getattr(self, "__name__", ""))
+            print " time %.3fs for %d passes, %d nodes max" % (
+                    sum(loop_timing), len(loop_timing), max_nb_nodes)
+
+            for i in range(len(loop_timing)):
+                print '%d - %.3fs (%.3fs in global opts) - %d nodes' % (
+                        i, loop_timing[i], global_opt_timing[i], nb_nodes[i])
+            print
+
+            count_opt = []
+            for opt, count in process_count.iteritems():
+                if count > 0:
+                    count_opt.append((count, opt))
+
+            if count_opt:
+                print 'times applied - optimizer:'
+                count_opt.sort()
+                for (count, opt) in count_opt[::-1]:
+                    print '  %d - %s' % (count, opt)
+                print
+
    def print_summary(self, stream=sys.stdout, level=0, depth=-1):
        name = getattr(self, 'name', None)
        print >> stream, "%s%s %s id=%i" %(' '*level, self.__class__.__name__, name, id(self))

--- a/theano/tensor/nnet/tests/test_sigm.py
+++ b/theano/tensor/nnet/tests/test_sigm.py
@@ -231,6 +231,26 @@ class T_sigmoid_opts(unittest.TestCase):
        ok(-exp(x) * -sigmoid(-x) * -exp(-x),
           -sigmoid(-x))

+    def test_grad_log1msigm(self):
+        # At some point, this returned nan, because (1 - sigm(x)) was
+        # on both the numerator and the denominator of a fraction,
+        # but the two nodes in question had not been merged.
+        x = tensor.matrix('x')
+        lr = tensor.scalar('lr')
+
+        s = sigmoid(x)
+        l = T.log(1 - s)
+        c = l.mean()
+        ux = x - lr * theano.grad(c, x)
+
+        # Before the optimization, inf and NaN will be produced in the graph,
+        # and DebugMode will complain. Everything is fine afterwards.
+        mode = self.get_mode()
+        if not isinstance(mode, theano.compile.DebugMode):
+            f = theano.function([x, lr], ux)
+            ux_v = f([[50]], 0.1)
+            assert not numpy.isnan(ux_v)
+

 class T_softplus_opts(unittest.TestCase):
    def setUp(self):

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -31,6 +31,7 @@ from theano import compile  # to register the optimizer built by this file
 from theano.gof.python25 import any, all
 from theano.gof.opt import (Optimizer, pre_constant_merge,
                            pre_greedy_local_optimizer)
+from theano.gof.opt import merge_optimizer
 from theano.gof import toolbox, DestroyHandler
 from basic import get_constant_value, ShapeError

@@ -301,6 +302,11 @@ def register_specialize_device(lopt, *tags, **kwargs):
    return lopt


+## Register merge_optimizer as a global opt during canonicalize
+compile.optdb['canonicalize'].register(
+        'canon_merge', merge_optimizer, 'fast_run')
+
+
 #####################
 # Dot optimizations #
 #####################