提交 f57b7b77 authored 作者: nouiz's avatar nouiz

Merge pull request #594 from lamblin/log1msigm

Add MergeOptimizer in canonicalize phase
...@@ -520,24 +520,45 @@ import theano and print the config variable, as in: ...@@ -520,24 +520,45 @@ import theano and print the config variable, as in:
Bool value, default: False Bool value, default: False
Should each SeqOptimizer object print the time taken by each of its Should each SeqOptimizer object print the time taken by each of its
optimizer. Each SeqOptimizer prints something like this: optimizer. Each SeqOptimizer prints something like this::
SeqOptimizer gpu_opt time 0.014s for 8/9 nodes before/after optimization SeqOptimizer gpu_opt time 0.014s for 8/9 nodes before/after optimization
[(0.0004410743713378906, ('InputToGpuOptimizer', 0.012573s - ('gpu_local_optimizations', 'EquilibriumOptimizer')
'InputToGpuOptimizer')), (0.0013730525970458984, 0.001373s - ('gpu_cut_transfers', 'EquilibriumOptimizer')
('gpu_cut_transfers', 'EquilibriumOptimizer')), 0.000441s - ('InputToGpuOptimizer', 'InputToGpuOptimizer')
(0.012573957443237305, ('gpu_local_optimizations',
'EquilibriumOptimizer'))]
This prints the name of the SeqOptimizer (gpu_opt), the number of This prints the name of the SeqOptimizer (gpu_opt), the number of
Apply nodes in the graph before (8) and after (9) Apply nodes in the graph before (8) and after (9)
optimizations. Then a list of tuples with 1 tuple per optimization optimizations. Then a list of lines, one per optimization
in this SeqOptimizer. The first element of the tuple is the time in this SeqOptimizer. The first element is the time
taken by this optimization and then it is a tuple with the name of the taken by this optimization and then it is a tuple with the name of the
optimization and this class. This list is sorted from the sub optimization and this class. This list is sorted from the sub
optimization that takes the most time to the optimization that takes optimization that takes the most time to the optimization that takes
the least time. the least time.
.. attribute:: config.time_eq_optimizer
Bool value, default: False
Should each EquilibriumOptimizer print the time taken by each of its
iterations, the total number of times each of its optimizers is applied,
and informations about the total number of nodes in the graph.
Here is an example of output::
EquilibriumOptimizer specialize
time 4.760s for 4 passes, 3801 nodes max
0 - 1.961s (0.079s in global opts) - 3797 nodes
1 - 1.233s (0.080s in global opts) - 3801 nodes
2 - 0.857s (0.071s in global opts) - 3203 nodes
3 - 0.710s (0.066s in global opts) - 3095 nodes
times applied - optimizer:
384 - dimshuffle_as_view
262 - constant_folding
216 - local_subtensor_make_vector
216 - local_shape_to_shape_i
4 - local_mul_specialize
.. attribute:: config.cmodule.warn_no_version .. attribute:: config.cmodule.warn_no_version
Bool value, default: False Bool value, default: False
......
...@@ -29,6 +29,11 @@ AddConfigVar('time_seq_optimizer', ...@@ -29,6 +29,11 @@ AddConfigVar('time_seq_optimizer',
BoolParam(False), BoolParam(False),
in_c_key=False) in_c_key=False)
AddConfigVar('time_eq_optimizer',
"Should EquilibriumOptimizer print the time taken by each optimizer",
BoolParam(False),
in_c_key=False)
import destroyhandler as dh import destroyhandler as dh
import traceback import traceback
...@@ -174,7 +179,10 @@ class SeqOptimizer(Optimizer, list): ...@@ -174,7 +179,10 @@ class SeqOptimizer(Optimizer, list):
if a[0]<b[0]: return -1 if a[0]<b[0]: return -1
return 1 return 1
lll.sort(cmp) lll.sort(cmp)
print lll
for (t, opt) in lll[::-1]:
print ' %.6fs - %s' % (t, opt)
print
def __eq__(self, other): def __eq__(self, other):
#added to override the list's __eq__ implementation #added to override the list's __eq__ implementation
...@@ -1115,7 +1123,12 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1115,7 +1123,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):
process_count = {} process_count = {}
max_nb_nodes = 0 max_nb_nodes = 0
loop_timing = []
global_opt_timing = []
nb_nodes = []
while changed and not max_use_abort: while changed and not max_use_abort:
t0 = time.time()
changed = False changed = False
#apply global optimizer #apply global optimizer
...@@ -1125,12 +1138,15 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1125,12 +1138,15 @@ class EquilibriumOptimizer(NavigatorOptimizer):
if env.change_tracker.changed: if env.change_tracker.changed:
changed = True changed = True
global_opt_timing.append(float(time.time() - t0))
#apply local optimizer #apply local optimizer
for node in start_from: for node in start_from:
assert node in env.outputs assert node in env.outputs
q = deque(graph.io_toposort(env.inputs, start_from)) q = deque(graph.io_toposort(env.inputs, start_from))
nb_nodes.append(len(q))
max_nb_nodes = max(max_nb_nodes, len(q)) max_nb_nodes = max(max_nb_nodes, len(q))
max_use = max_nb_nodes * self.max_use_ratio max_use = max_nb_nodes * self.max_use_ratio
def importer(node): def importer(node):
...@@ -1163,12 +1179,37 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1163,12 +1179,37 @@ class EquilibriumOptimizer(NavigatorOptimizer):
finally: finally:
self.detach_updater(env, u) self.detach_updater(env, u)
self.detach_updater(env, u) #TODO: erase this line, it's redundant at best self.detach_updater(env, u) #TODO: erase this line, it's redundant at best
loop_timing.append(float(time.time() - t0))
if max_use_abort: if max_use_abort:
_logger.error("EquilibriumOptimizer max'ed out by '%s'" % opt_name _logger.error("EquilibriumOptimizer max'ed out by '%s'" % opt_name
+ ". You can safely raise the current threshold of " + ". You can safely raise the current threshold of "
+ "%f with the theano flag 'optdb.max_use_ratio'." % + "%f with the theano flag 'optdb.max_use_ratio'." %
config.optdb.max_use_ratio) config.optdb.max_use_ratio)
if config.time_eq_optimizer:
print "EquilibriumOptimizer",
print getattr(self, "name", getattr(self, "__name__", ""))
print " time %.3fs for %d passes, %d nodes max" % (
sum(loop_timing), len(loop_timing), max_nb_nodes)
for i in range(len(loop_timing)):
print '%d - %.3fs (%.3fs in global opts) - %d nodes' % (
i, loop_timing[i], global_opt_timing[i], nb_nodes[i])
print
count_opt = []
for opt, count in process_count.iteritems():
if count > 0:
count_opt.append((count, opt))
if count_opt:
print 'times applied - optimizer:'
count_opt.sort()
for (count, opt) in count_opt[::-1]:
print ' %d - %s' % (count, opt)
print
def print_summary(self, stream=sys.stdout, level=0, depth=-1): def print_summary(self, stream=sys.stdout, level=0, depth=-1):
name = getattr(self, 'name', None) name = getattr(self, 'name', None)
print >> stream, "%s%s %s id=%i" %(' '*level, self.__class__.__name__, name, id(self)) print >> stream, "%s%s %s id=%i" %(' '*level, self.__class__.__name__, name, id(self))
......
...@@ -231,6 +231,26 @@ class T_sigmoid_opts(unittest.TestCase): ...@@ -231,6 +231,26 @@ class T_sigmoid_opts(unittest.TestCase):
ok(-exp(x) * -sigmoid(-x) * -exp(-x), ok(-exp(x) * -sigmoid(-x) * -exp(-x),
-sigmoid(-x)) -sigmoid(-x))
def test_grad_log1msigm(self):
# At some point, this returned nan, because (1 - sigm(x)) was
# on both the numerator and the denominator of a fraction,
# but the two nodes in question had not been merged.
x = tensor.matrix('x')
lr = tensor.scalar('lr')
s = sigmoid(x)
l = T.log(1 - s)
c = l.mean()
ux = x - lr * theano.grad(c, x)
# Before the optimization, inf and NaN will be produced in the graph,
# and DebugMode will complain. Everything is fine afterwards.
mode = self.get_mode()
if not isinstance(mode, theano.compile.DebugMode):
f = theano.function([x, lr], ux)
ux_v = f([[50]], 0.1)
assert not numpy.isnan(ux_v)
class T_softplus_opts(unittest.TestCase): class T_softplus_opts(unittest.TestCase):
def setUp(self): def setUp(self):
......
...@@ -31,6 +31,7 @@ from theano import compile # to register the optimizer built by this file ...@@ -31,6 +31,7 @@ from theano import compile # to register the optimizer built by this file
from theano.gof.python25 import any, all from theano.gof.python25 import any, all
from theano.gof.opt import (Optimizer, pre_constant_merge, from theano.gof.opt import (Optimizer, pre_constant_merge,
pre_greedy_local_optimizer) pre_greedy_local_optimizer)
from theano.gof.opt import merge_optimizer
from theano.gof import toolbox, DestroyHandler from theano.gof import toolbox, DestroyHandler
from basic import get_constant_value, ShapeError from basic import get_constant_value, ShapeError
...@@ -301,6 +302,11 @@ def register_specialize_device(lopt, *tags, **kwargs): ...@@ -301,6 +302,11 @@ def register_specialize_device(lopt, *tags, **kwargs):
return lopt return lopt
## Register merge_optimizer as a global opt during canonicalize
compile.optdb['canonicalize'].register(
'canon_merge', merge_optimizer, 'fast_run')
##################### #####################
# Dot optimizations # # Dot optimizations #
##################### #####################
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论