提交 0845ddc3 authored 作者: lamblin's avatar lamblin

Merge pull request #1407 from nouiz/fix_test_p33

Fix test p33
......@@ -286,6 +286,41 @@ can be achieved as follows:
# Inputs : [array(0.0)]
# Outputs: [array(nan)]
To help understand what is happening in your graph, you can
disable the ``local_elemwise_fusion`` and all ``inplace``
optimizations. The first is a speed optimization that merge elemwise
operations together. This make it harder to know which particular
elemwise cause the problem. The second optimization make some ops
output overwrite its input. So, if an op create a bad output, you
won't be able see the input that was overwriten in the ``post_fun``
function. To disable those optimization (with a Theano version after
0.6rc3), define the MonitorMode like this:
.. code-block:: python
mode = theano.compile.MonitorMode(post_func=detect_nan).excluding(
'local_elemwise_fusion', 'inplace)
f = theano.function([x], [theano.tensor.log(x) * x],
mode=mode)
.. note::
The Theano flags ``optimizer_including``, ``optimizer_excluding``
and ``optimizer_requiring`` aren't used by the MonitorMode, they
are used only by the ``default`` mode. You can't use the ``default``
mode with MonitorMode, as you need to define what you monitor.
To be sure all inputs of the node are available during the call to
``post_func``, you also must disable the garbage collector. Otherwise,
the execution of the node can garbage collect its inputs that aren't
needed anymore by the Theano function. This can be done with the Theano
flag:
.. code-block:: cfg
allow_gc=False
.. TODO: documentation for link.WrapLinkerMany
......
......@@ -20,7 +20,8 @@ class MonitorMode(Mode):
For an example of such a use case, see doc/tutorial/debug_faq.txt.
"""
def __init__(self, pre_func=None, post_func=None, optimizer='fast_run'):
def __init__(self, pre_func=None, post_func=None,
optimizer='default', linker=None):
"""
Constructor.
......@@ -35,11 +36,21 @@ class MonitorMode(Mode):
:param optimizer: The optimizer to use. One may use for instance
'fast_compile' to skip optimizations.
:param linker: DO NOT USE. This mode use its own linker.
The parameter is needed to allow selecting optimizers to use.
"""
self.pre_func = pre_func
self.post_func = post_func
wrap_linker = theano.gof.WrapLinkerMany([theano.gof.OpWiseCLinker()],
[self.eval])
if optimizer is 'default':
optimizer = theano.config.optimizer
if (linker is not None and
not isinstance(linker.mode, MonitorMode)):
raise Exception("MonitorMode can only use its own linker! You "
"should not provide one.", linker)
super(MonitorMode, self).__init__(wrap_linker, optimizer=optimizer)
def eval(self, i, node, fn):
......@@ -51,3 +62,21 @@ class MonitorMode(Mode):
fn()
if self.post_func is not None:
self.post_func(i, node, fn)
def including(self, *tags):
ret = super(MonitorMode, self).including(*tags)
ret.pre_func = self.pre_func
ret.post_func = self.post_func
return ret
def excluding(self, *tags):
ret = super(MonitorMode, self).excluding(*tags)
ret.pre_func = self.pre_func
ret.post_func = self.post_func
return ret
def requiring(self, *tags):
ret = super(MonitorMode, self).requiring(*tags)
ret.pre_func = self.pre_func
ret.post_func = self.post_func
return ret
......@@ -439,6 +439,11 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
and not isinstance(no_default_updates, list):
raise TypeError("no_default_update should be either a boolean or a list")
if len(updates) > 0 and any(isinstance(v, Variable)
for v in iter_over_pairs(updates)):
raise ValueError(
"The updates parameter must an OrderedDict/dict or a list of list/tuple with 2 elements")
# transform params into theano.compile.In objects.
inputs = [_pfunc_param_to_in(p, allow_downcast=allow_input_downcast)
for p in params]
......
......@@ -25,3 +25,67 @@ def test_detect_nan():
post_func=detect_nan))
f(0) # log(0) * 0 = -inf * 0 = NaN
assert nan_detected[0]
def test_optimizer():
"""
Test that we can remove optimizer
"""
nan_detected = [False]
def detect_nan(i, node, fn):
for output in fn.outputs:
if numpy.isnan(output[0]).any():
print '*** NaN detected ***'
theano.printing.debugprint(node)
print 'Inputs : %s' % [input[0] for input in fn.inputs]
print 'Outputs: %s' % [output[0] for output in fn.outputs]
nan_detected[0] = True
break
x = theano.tensor.dscalar('x')
mode = theano.compile.MonitorMode(post_func=detect_nan)
mode = mode.excluding('fusion')
f = theano.function([x], [theano.tensor.log(x) * x],
mode=mode)
# Test that the fusion wasn't done
assert len(f.maker.fgraph.nodes) == 2
f(0) # log(0) * 0 = -inf * 0 = NaN
# Test that we still detect the nan
assert nan_detected[0]
def test_not_inplace():
"""
Test that we can remove optimizers including inplace optimizers
"""
nan_detected = [False]
def detect_nan(i, node, fn):
for output in fn.outputs:
if numpy.isnan(output[0]).any():
print '*** NaN detected ***'
theano.printing.debugprint(node)
print 'Inputs : %s' % [input[0] for input in fn.inputs]
print 'Outputs: %s' % [output[0] for output in fn.outputs]
nan_detected[0] = True
break
x = theano.tensor.vector('x')
mode = theano.compile.MonitorMode(post_func=detect_nan)
#mode = mode.excluding('fusion', 'inplace')
mode = mode.excluding('local_elemwise_fusion',
'inplace_elemwise_optimizer')
o = theano.tensor.outer(x, x)
out = theano.tensor.log(o) * o
f = theano.function([x], [out],
mode=mode)
# Test that the fusion wasn't done
assert len(f.maker.fgraph.nodes) == 5
assert not f.maker.fgraph.toposort()[-1].op.destroy_map
f([0, 0]) # log(0) * 0 = -inf * 0 = NaN
# Test that we still detect the nan
assert nan_detected[0]
......@@ -16,6 +16,12 @@ import itertools
import distutils.sysconfig
importlib = None
try:
import importlib
except ImportError:
pass
import numpy.distutils # TODO: TensorType should handle this
import theano
......@@ -278,6 +284,9 @@ def dlimport(fullpath, suffix=None):
sys.path[0:0] = [workdir] # insert workdir at beginning (temporarily)
try:
if importlib is not None:
if hasattr(importlib, "invalidate_caches"):
importlib.invalidate_caches()
rval = __import__(module_name, {}, {}, [module_name])
if not rval:
raise Exception('__import__ failed', fullpath)
......
差异被折叠。
......@@ -4,7 +4,7 @@ from theano.compat import cmp
## {{{ http://code.activestate.com/recipes/578231/ (r1)
# Copyright (c) Oren Tirosh 2012
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
......@@ -22,6 +22,8 @@ from theano.compat import cmp
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
def memodict(f):
""" Memoization decorator for a function taking a single argument """
class memodict(defaultdict):
......@@ -37,10 +39,11 @@ def make_depends():
def depends((a, b)):
""" Returns True if a depends on b """
return (any(bout in a.inputs for bout in b.outputs)
or any(depends((ainp.owner, b)) for ainp in a.inputs
if ainp.owner))
or any(depends((ainp.owner, b)) for ainp in a.inputs
if ainp.owner))
return depends
def make_dependence_cmp():
""" Create a comparator to represent the dependence of nodes in a graph """
......@@ -53,18 +56,27 @@ def make_dependence_cmp():
Returns negative number if b depends on a
Returns 0 otherwise
"""
if depends((a, b)): return 1
if depends((b, a)): return -1
if depends((a, b)):
return 1
if depends((b, a)):
return -1
return 0
return dependence
def reverse_dict(d):
""" Reverses direction of dependence dict
"""Reverses direction of dependence dict
>>> d = {'a': (1, 2), 'b': (2, 3), 'c':()}
>>> reverse_dict(d)
{1: ('a',), 2: ('a', 'b'), 3: ('b',)}
:note: dict order are not deterministic. As we iterate on the
input dict, it make the output of this function depend on the
dict order. So this function output order should be considered
as undeterministic.
"""
result = {}
for key in d:
......@@ -72,6 +84,7 @@ def reverse_dict(d):
result[val] = result.get(val, tuple()) + (key, )
return result
def _toposort(edges):
""" Topological sort algorithm by Kahn [1] - O(nodes + vertices)
......@@ -106,6 +119,7 @@ def _toposort(edges):
raise ValueError("Input has cycles")
return L
def posort(l, *cmps):
""" Partially ordered sort with multiple comparators
......@@ -127,9 +141,9 @@ def posort(l, *cmps):
implemented with _toposort """
comes_before = dict((a, set()) for a in l)
comes_after = dict((a, set()) for a in l)
comes_after = dict((a, set()) for a in l)
def add_links(a, b): # b depends on a
def add_links(a, b): # b depends on a
comes_after[a].add(b)
comes_after[a].update(comes_after[b])
for c in comes_before[a]:
......@@ -148,7 +162,7 @@ def posort(l, *cmps):
for cmp in cmps:
for a in l:
for b in l:
if cmp(a, b) < 0: # a wants to come before b
if cmp(a, b) < 0: # a wants to come before b
# if this wouldn't cause a cycle and isn't already known
if not b in comes_before[a] and not b in comes_after[a]:
add_links(a, b)
......@@ -156,6 +170,7 @@ def posort(l, *cmps):
return _toposort(comes_after)
def sort_apply_nodes(inputs, outputs, cmps):
""" Order a graph of apply nodes according to a list of comparators
......@@ -178,6 +193,7 @@ def sort_apply_nodes(inputs, outputs, cmps):
return posort(list_of_nodes(inputs, outputs), *cmps)
def sort_schedule_fn(*cmps):
""" Make a schedule function from comparators
......@@ -186,11 +202,13 @@ def sort_schedule_fn(*cmps):
"""
dependence = make_dependence_cmp()
cmps = (dependence,) + cmps
def schedule(fgraph):
""" Order nodes in a FunctionGraph """
return sort_apply_nodes(fgraph.inputs, fgraph.outputs, cmps)
return schedule
def key_to_cmp(key):
def key_cmp(a, b):
return cmp(key(a), key(b))
......
from theano.gof.sched import (make_dependence_cmp, sort_apply_nodes,
reverse_dict, _toposort, posort)
reverse_dict, _toposort, posort)
import theano
from theano import tensor
......@@ -7,6 +7,7 @@ from theano.gof.graph import io_toposort
from theano.gof.python25 import any
from theano.compat import cmp
def test_dependence():
dependence = make_dependence_cmp()
......@@ -30,7 +31,10 @@ def test_sort_apply_nodes():
def test_reverse_dict():
d = {'a': (1, 2), 'b': (2, 3), 'c': ()}
assert reverse_dict(d) == {1: ('a',), 2: ('a', 'b'), 3: ('b',)}
# Python 3.3 enable by default random hash for dict.
# This change the order of traversal, so this can give 2 outputs
assert (reverse_dict(d) == {1: ('a',), 2: ('a', 'b'), 3: ('b',)} or
reverse_dict(d) == {1: ('a',), 2: ('b', 'a'), 3: ('b',)})
def test__toposort():
......@@ -44,7 +48,7 @@ def test__toposort():
def test_posort_easy():
nodes = "asdfghjkl"
def cmp(a, b):
def mycmp(a, b):
if a < b:
return -1
elif a > b:
......@@ -52,7 +56,7 @@ def test_posort_easy():
else:
return 0
assert posort(nodes, cmp) == list("adfghjkls")
assert posort(nodes, mycmp) == list("adfghjkls")
def test_posort():
......
......@@ -360,7 +360,7 @@ class T_softplus_opts(unittest.TestCase):
f(numpy.random.rand(54).astype(config.floatX))
def test_log1msigm_to_softplus(self):
x = T.vector()
x = T.matrix()
out = T.log(1 - sigmoid(x))
f = theano.function([x], out, mode=self.m)
......@@ -369,7 +369,29 @@ class T_softplus_opts(unittest.TestCase):
assert isinstance(topo[0].op.scalar_op,
theano.tensor.nnet.sigm.ScalarSoftplus)
assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg)
f(numpy.random.rand(54).astype(config.floatX))
f(numpy.random.rand(54, 11).astype(config.floatX))
# Same test with a flatten
out = T.log(1 - T.flatten(sigmoid(x)))
f = theano.function([x], out, mode=self.m)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[0].op, T.Flatten)
assert isinstance(topo[1].op.scalar_op,
theano.tensor.nnet.sigm.ScalarSoftplus)
assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
f(numpy.random.rand(54, 11).astype(config.floatX))
# Same test with a reshape
out = T.log(1 - sigmoid(x).reshape([x.size]))
f = theano.function([x], out, mode=self.m)
topo = f.maker.fgraph.toposort()
#assert len(topo) == 3
assert any(isinstance(node.op, T.Reshape) for node in topo)
assert any(isinstance(getattr(node.op, 'scalar_op', None),
theano.tensor.nnet.sigm.ScalarSoftplus)
for node in topo)
f(numpy.random.rand(54, 11).astype(config.floatX))
def test_log1pexp_to_softplus(self):
m = theano.config.mode
......
......@@ -273,8 +273,8 @@ def inplace_elemwise_optimizer_op(OP):
return inplace_elemwise_optimizer
inplace_elemwise_optimizer = inplace_elemwise_optimizer_op(T.Elemwise)
compile.optdb.register('inplace_opt', inplace_elemwise_optimizer, 75,
'inplace_elemwise_optimizer',
'fast_run', 'inplace')
......@@ -2385,6 +2385,27 @@ def local_div_switch_sink(node):
return False
################
# Flatten Opts #
################
@register_canonicalize
@register_stabilize
@gof.local_optimizer([])
def local_flatten_lift(node):
"""
Flatten(UnaryElemwise(x)) -> UnaryElemwise(Flatten(x))
This optimization is needed by optimization
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a flatten.
"""
if (isinstance(node.op, T.Flatten) and
node.inputs[0].owner and
isinstance(node.inputs[0].owner.op, T.Elemwise) and
len(node.inputs[0].owner.inputs) == 1):
f = node.op(node.inputs[0].owner.inputs[0])
e = node.inputs[0].owner.op(f)
return [e]
##################
# Reshape opts #
##################
......@@ -2415,6 +2436,26 @@ def local_reshape_chain(node):
return False
register_canonicalize(local_reshape_chain)
@register_canonicalize
@register_stabilize
@gof.local_optimizer([])
def local_reshape_lift(node):
"""
Reshape(UnaryElemwise(x)) -> UnaryElemwise(Reshape(x))
This optimization is needed by optimization
nnet/sigm.py:log1msigm_to_softplus to get applied when there is a reshape.
"""
if (isinstance(node.op, T.Reshape) and
node.inputs[0].owner and
isinstance(node.inputs[0].owner.op, T.Elemwise) and
len(node.inputs[0].owner.inputs) == 1):
r = node.op(node.inputs[0].owner.inputs[0], node.inputs[1])
e = node.inputs[0].owner.op(r)
return [e]
if 0:
# TODO: Test that this optimziation works.
@register_canonicalize
......
......@@ -3988,6 +3988,35 @@ def test_local_div_to_inv():
assert numpy.allclose(out_val, 0.5)
def test_local_flatten_lift():
for i in range(1, 4):
op = tensor.Flatten(i)
x = tensor.tensor4()
out = op(T.exp(x))
assert out.ndim == i
mode = compile.mode.get_default_mode()
mode = mode.including('local_flatten_lift')
f = theano.function([x], out, mode=mode)
f(numpy.random.rand(5, 4, 3, 2).astype(config.floatX))
topo = f.maker.fgraph.toposort()
assert len(topo) == 2
assert isinstance(topo[0].op, tensor.Flatten)
assert isinstance(topo[1].op, tensor.Elemwise)
def test_local_reshape_lift():
x = tensor.tensor4()
out = T.exp(x).reshape([x.size])
assert out.ndim == 1
mode = compile.mode.get_default_mode()
mode = mode.including('local_reshape_lift')
f = theano.function([x], out, mode=mode)
f(numpy.random.rand(5, 4, 3, 2).astype(config.floatX))
topo = f.maker.fgraph.toposort()
assert isinstance(topo[-2].op, tensor.Reshape)
assert isinstance(topo[-1].op, tensor.Elemwise)
class Test_lift_transpose_through_dot(unittest.TestCase):
def simple_optimize(self, g):
out2in(opt.local_useless_elemwise).optimize(g)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论