New user-friendly function to compare variables

900b96f4 · Olivier Delalleau · ddd1dc03 · 900b96f4 · 900b96f4 · 900b96f4
--- a/theano/gof/graph.py
+++ b/theano/gof/graph.py
@@ -8,10 +8,16 @@ To read about what theano graphs are from a user perspective, have a look at

 __docformat__ = "restructuredtext en"

+
 from copy import copy
-from theano.gof import deque

-import utils
+import theano
+from theano.gof import deque, utils
+
+# Lazy imports to avoid circular dependencies.
+is_same_graph_with_merge = None
+equal_computations = None
+

 class Apply(utils.object2):
    """
@@ -684,6 +690,111 @@ default_leaf_formatter = str
 default_node_formatter = lambda op, argstrings: "%s(%s)" % (op.op,
                                                            ", ".join(argstrings))

+
+def is_same_graph(var1, var2, givens={}, debug=False):
+    """
+    Return True iff Variables `var1` and `var2` perform the same computation.
+
+    By 'performing the same computation', we mean that they must share the same
+    graph, so that for instance this function will return False when comparing
+    (x * (y * z)) with ((x * y) * z).
+
+    The current implementation is not efficient since, when possible, it
+    verifies equality by calling two different functions that are expected to
+    return the same output. The goal is to verify this assumption, to
+    eventually get rid of one of them in the future.
+
+    :param var1: The first Variable to compare.
+
+    :param var2: The second Variable to compare.
+
+    :param givens: Similar to the `givens` argument of `theano.function`, it
+    can be used to perform substitutions in the computational graph of `var1`
+    and `var2`. This argument is associated to neither `var1` nor `var2`:
+    substitutions may affect both graphs if the substituted variable is present
+    in both.
+
+    :param debug: If True, then an exception is raised when we are in a
+    situation where the `equal_computations` implementation cannot be called.
+    This parameter is intended to be used in tests only, to make sure we
+    properly test both implementations.
+
+    Examples:
+
+        ======  ======  ======  ======
+        var1    var2    givens  output
+        ======  ======  ======  ======
+        x + 1   x + 1   {}      True
+        x + 1   y + 1   {}      False
+        x + 1   y + 1   {x: y}  True
+        ======  ======  ======  ======
+    """
+    # Lazy import.
+    global equal_computations, is_same_graph_with_merge
+    if equal_computations is None:
+        from theano.gof.opt import is_same_graph_with_merge
+        from theano.scan_module.scan_utils import equal_computations
+    # Convert `givens` to dictionary.
+    if not isinstance(givens, dict):
+        givens = dict(givens)
+    # Get result from the merge-based function.
+    rval1 = is_same_graph_with_merge(var1=var1, var2=var2, givens=givens)
+    # Get result from the function `equal_computations` from scan_utils.
+    use_equal_computations = True
+    if givens:
+        # We need to build the `in_xs` and `in_ys` lists. To do this, we need
+        # to be able to tell whether a variable belongs to the computational
+        # graph of `var1` or `var2`.
+        # The typical case we want to handle is when `to_replace` belongs to
+        # one of these graphs, and `replace_by` belongs to the other one. In
+        # other situations, the current implementation of `equal_computations`
+        # is probably not appropriate, so we do not call it.
+        ok = True
+        in_xs = []
+        in_ys = []
+        # Compute the sets of all variables found in each computational graph.
+        inputs_var = map(inputs, ([var1], [var2]))
+        all_vars = [set(variables(v_i, v_o))
+                    for v_i, v_o in ((inputs_var[0], [var1]),
+                                     (inputs_var[1], [var2]))]
+        def in_var(x, k):
+            # Return True iff `x` is in computation graph of variable `vark`.
+            return x in all_vars[k - 1]
+        for to_replace, replace_by in givens.iteritems():
+            # Map a substitution variable to the computational graphs it
+            # belongs to.
+            inside = dict((v, [in_var(v, k) for k in (1, 2)])
+                          for v in (to_replace, replace_by))
+            if (inside[to_replace][0] and not inside[to_replace][1] and
+                inside[replace_by][1] and not inside[replace_by][0]):
+                # Substitute variable in `var1` by one from `var2`.
+                in_xs.append(to_replace)
+                in_ys.append(replace_by)
+            elif (inside[to_replace][1] and not inside[to_replace][0] and
+                  inside[replace_by][0] and not inside[replace_by][1]):
+                # Substitute variable in `var2` by one from `var1`.
+                in_xs.append(replace_by)
+                in_ys.append(to_replace)
+            else:
+                ok = False
+                break
+        if not ok:
+            # We cannot directly use `equal_computations`.
+            if debug:
+                raise AssertionError(
+                    'When `debug` is True we want to make sure we are also '
+                    'using the `equal_computations` implementation')
+            use_equal_computations = False
+    else:
+        in_xs = None
+        in_ys = None
+    if use_equal_computations:
+        rval2 = equal_computations(xs=[var1], ys=[var2],
+                                   in_xs=in_xs, in_ys=in_ys)
+        assert rval2 == rval1
+    return rval1
+
+
 def op_as_string(i, op,
                 leaf_formatter = default_leaf_formatter,
                 node_formatter = default_node_formatter):

--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -249,13 +249,13 @@ class MergeOptimizer(Optimizer):
    """
    Merges parts of the graph that are identical and redundant.

-    The basic principle is that if two Applies have ops that compare equal, and identical
-    inputs, then they do not both need to be computed.  The clients of one are transfered to
-    the other and one of them is removed from the graph.  This procedure is carried out in
-    input->output order through the graph.
+    The basic principle is that if two Applies have ops that compare equal, and
+    identical inputs, then they do not both need to be computed. The clients of
+    one are transferred to the other and one of them is removed from the graph.
+    This procedure is carried out in input->output order through the graph.

-    The first step of merging is constant-merging, so that all clients of an int(1) for example,
-    are transfered to a particular instance of int(1).
+    The first step of merging is constant-merging, so that all clients of an
+    int(1) for example, are transferred to a particular instance of int(1).
    """
    def __init__(self, skip_const_merge=False):
        self.skip_const_merge = skip_const_merge
@@ -348,6 +348,41 @@ class MergeOptimizer(Optimizer):

 merge_optimizer = MergeOptimizer()

+
+def is_same_graph_with_merge(var1, var2, givens={}):
+    """
+    Merge-based implementation of `theano.gof.graph.is_same_graph`.
+
+    See help on `theano.gof.graph.is_same_graph` for additional documentation.
+    """
+    # Copy variables since the MergeOptimizer will modify them.
+    copied = copy.deepcopy([var1, var2, givens])
+    vars = copied[0:2]
+    givens = copied[2]
+    # Create Env.
+    inputs = theano.gof.graph.inputs(vars)
+    env = theano.gof.env.Env(inputs, vars)
+    # Perform Variable substitution.
+    for to_replace, replace_by in givens.iteritems():
+        env.replace(to_replace, replace_by)
+    # Perform merge optimization.
+    merge_optimizer.optimize(env)
+    # When two variables perform the same computations, they will have the same
+    # owner in the optimized graph.
+    # We need to be careful with the special case where the owner is None,
+    # which happens when the graph is made of a single Variable.
+    # We also need to make sure we replace a Variable if it is present in
+    # `givens`.
+    vars_replaced = [givens.get(v, v) for v in vars]
+    o1, o2 = [v.owner for v in vars_replaced]
+    if o1 is None and o2 is None:
+        # Comparing two single-Variable graphs: they are equal if they are
+        # the same Variable.
+        return vars_replaced[0] == vars_replaced[1]
+    else:
+        return o1 is o2
+
+
 def MergeOptMerge(opt):
    """WRITEME
    Returns an Optimizer that merges the graph then applies the

--- a/theano/gof/tests/test_graph.py
+++ b/theano/gof/tests/test_graph.py
-
+import unittest
 from collections import deque
-from theano.gof.graph import *

+from theano import tensor
+from theano.gof.graph import (
+        Apply, as_string, clone, general_toposort, inputs, io_toposort,
+        is_same_graph, Variable)
 from theano.gof.op import Op
 from theano.gof.type import Type
-from theano.gof.graph import Variable


 def as_variable(x):
@@ -216,4 +218,76 @@ class TestToposort:
        assert all == [o0]


-
+#################
+# is_same_graph #
+#################
+
+class TestIsSameGraph(unittest.TestCase):
+
+    def check(self, expected, debug=True):
+        """
+        Core function to perform comparison.
+
+        :param expected: A list of tuples (v1, v2, ((g1, o1), ..., (gN, oN)))
+        with:
+            - `v1` and `v2` two Variables (the graphs to be compared)
+            - `gj` a `givens` dictionary to give as input to `is_same_graph`
+            - `oj` the expected output of `is_same_graph(v1, v2, givens=gj)`
+
+        :param debug: If True, then we make sure we are testing both
+        implementations of `is_same_graph`.
+
+        This function also tries to call `is_same_graph` by inverting `v1` and
+        `v2`, and ensures the output remains the same.
+        """
+        for v1, v2, go in expected:
+            for gj, oj in go:
+                r1 = is_same_graph(v1, v2, givens=gj, debug=debug)
+                assert r1 == oj
+                r2 = is_same_graph(v2, v1, givens=gj, debug=debug)
+                assert r2 == oj
+
+    def test_single_var(self):
+        """
+        Test `is_same_graph` with some trivial graphs (one Variable).
+        """
+        x, y, z = tensor.vectors('x', 'y', 'z')
+        self.check([
+            (x, x, (({}, True), )),
+            (x, y, (({}, False), ({y: x}, True), )),
+            (x, tensor.neg(x), (({}, False), )),
+            (x, tensor.neg(y), (({}, False), )),
+            ])
+
+    def test_full_graph(self):
+        """
+        Test `is_same_graph` with more complex graphs.
+        """
+        x, y, z = tensor.vectors('x', 'y', 'z')
+        t = x * y
+        self.check([
+            (x * 2, x * 2, (({}, True), )),
+            (x * 2, y * 2, (({}, False), ({y: x}, True), )),
+            (x * 2, y * 2, (({}, False), ({x: y}, True), )),
+            (x * 2, y * 3, (({}, False), ({y: x}, False), )),
+            (t * 2, z * 2, (({}, False), ({t: z}, True), )),
+            (t * 2, z * 2, (({}, False), ({z: t}, True), )),
+            (x * (y * z), (x * y) * z, (({}, False), )),
+            ])
+
+    def test_merge_only(self):
+        """
+        Test `is_same_graph` when `equal_computations` cannot be used.
+        """
+        x, y, z = tensor.vectors('x', 'y', 'z')
+        t = x * y
+        self.check([
+            (x, t, (({}, False), ({t: x}, True))),
+            (t * 2, x * 2, (({}, False), ({t: x}, True), )),
+            (x * x, x * y, (({}, False), ({y: x}, True), )),
+            (x * x, x * y, (({}, False), ({y: x}, True), )),
+            (x * x + z, x * y + t, (({}, False),
+                                    ({y: x}, False),
+                                    ({y: x, t: z}, True))),
+            ],
+            debug=False)