Merge pull request #1683 from nouiz/opfromgraph

OpFromGraph documentation and cleanup

Merge pull request #1683 from nouiz/opfromgraph
defa003a · abergeron · e05e801f · f0570b11 · defa003a · defa003a
--- a/doc/library/compile/opfromgraph.txt
+++ b/doc/library/compile/opfromgraph.txt
+.. _opfromgraph:
+
+===========
+OpFromGraph
+===========
+
+This page descripbe :class:`theano.OpFromGraph
+<theano.compile.builders.OpFromGraph>`. an Op that allow to
+encapsulate a Theano graph in an op.
+
+This can be used to encapsulate some functionality in one block. It is
+useful to scale Theano compilation for regular bigger graph when we
+reuse that encapsulated fonctionality with different inputs many
+times. Due to this encapsulation, it can make Theano compilation phase
+faster for graph with many nodes.
+
+Using this for small graph isn't recommanded as it disable
+optimization between what is inside the encapsulation and outside it.
+
+.. note:
+
+    This wasn't used widely up to now. If you have any
+    questions/comments don't contact us on the mailing list.
+
+
+
+.. autoclass:: theano.compile.builders.OpFromGraph
--- a/theano/compile/__init__.py
+++ b/theano/compile/__init__.py
@@ -9,8 +9,6 @@ from theano.compile.mode import *

 from theano.compile.io import *

-from theano.compile.builders import *
-
 from theano.compile.module import *

 from theano.compile.debugmode import DebugMode
@@ -25,4 +23,6 @@ from theano.compile.sharedvalue import (shared, shared_constructor,
                                        SharedVariable)
 from theano.compile.pfunc import pfunc, Param, rebuild_collect_shared

+from theano.compile.builders import *
+
 from theano.compile.function import function
--- a/theano/compile/builders.py
+++ b/theano/compile/builders.py
 from theano import gof
 from theano import gradient as G
 from theano.compile.function_module import orig_function
+from theano.compile import SharedVariable, rebuild_collect_shared
 from theano.gof import ops_with_inner_function


 class OpFromGraph(gof.Op):
-    """
-    This create an L{Op} from a list of input variables and a list of output
-    variables.
-
-    The signature is the same as the signature of L{FunctionFactory}
-    and/or function and the resulting L{Op}'s perform will do the same
-    operation as::
-      function(inputs, outputs, **kwargs)
-
-    Take note that the following options, if provided, must take the
-    value(s) listed below:
-      unpack_single = False
-      borrow_outputs = False
-
-    OpFromGraph takes an additional input, grad_depth. If grad_depth
-    is n, OpFromGraph will make special Ops for gradients up to the
-    nth level, allowing the user to differentiate this op up to n
-    times. The parameter defaults to 1. If grad_depth == 0, the op
-    will not be differentiable.
-
-    Example:
-      x, y, z = tensor.scalars('xyz')
-      e = x + y * z
-      op = OpFromGraph([x, y, z], [e], linker='c')
-      # op behaves like a normal theano op
-      e2 = op(x, y, z) + op(z, y, x)
-      fn = function([x, y, z], [e2])
+    """This create an `Op` from inputs and outputs list of variables.
+
+    The signature is similar to theano.function() and the resulting
+    `Op` perform will do the same operation as::
+
+        orig_function(inputs, outputs, **kwargs)
+
+    TODO:
+        - examples for a multi-layer mlp. where?
+        - __hash__, __eq__ otherwise won't merge, try gof.opt.is_same_graph_with_merge(op1.new_outputs, op2, new_outputs)
+        - c_code() to remove the double overhead?
+        - opt to unfold it, work inplace on inputs
+        - grad() make it support DisconnectedType and the new interface
+        - check how it work with updates.
+        - add test with constant as input or inside the inner graph.
+        - Add support for the GPU? Probably just need an opt to remove transfer
+        - Add support to pickle this Op.
+        - Add support/test with random generator
+    :note:
+        - We support shared variable in the inner graph. This is automatic and
+          invisible to the user. They can be as input to the node or in the
+          inner graph.
+        - We support unused inputs. This is needed for the grad.
+
+    Example 1:
+
+    .. code-block:: python
+
+        from theano import function, OpFromGraph, tensor
+        x, y, z = tensor.scalars('xyz')
+        e = x + y * z
+        op = OpFromGraph([x, y, z], [e])
+        # op behaves like a normal theano op
+        e2 = op(x, y, z) + op(z, y, x)
+        fn = function([x, y, z], [e2])
+
+
+
+    Example 2 with shared variable:
+
+    .. code-block:: python
+
+        import numpy
+        import theano
+        from theano import config, function, OpFromGraph, tensor
+        x, y, z = tensor.scalars('xyz')
+        s = theano.shared(numpy.random.rand(2, 2).astype(config.floatX))
+        e = x + y * z + s
+        op = OpFromGraph([x, y, z], [e])
+        # op behaves like a normal theano op
+        e2 = op(x, y, z) + op(z, y, x)
+        fn = function([x, y, z], [e2])
+
    """

-    def __init__(self, inputs, outputs, grad_depth=1, **kwargs):
+    def __init__(self, inputs, outputs, **kwargs):
        if not isinstance(outputs, list):
            raise TypeError('outputs must be list', outputs)
        for i in inputs + outputs:
@@ -44,34 +71,33 @@ class OpFromGraph(gof.Op):
        if 'updates' in kwargs:
            raise TypeError('updates are not allowed in kwargs')

-        # TODO: the graph may have implicit inputs like
-        #       SharedVariable instances.
-        #       what impact to they have on the validity of this Op?
-        self.fn = orig_function(inputs, outputs, **kwargs)
+        # To support correctly shared variables the inner fct should
+        # not see them. Otherwise their is problem with the gradient.
+        self.shared_inputs = [var for var in gof.graph.inputs(outputs)
+                              if isinstance(var, SharedVariable)]
+        used_inputs = [var for var in gof.graph.inputs(outputs)
+                       if not isinstance(var, gof.Constant)]
+        shared_vars = [var.type() for var in self.shared_inputs]
+        new = rebuild_collect_shared(outputs, inputs=inputs + shared_vars,
+                                     replace=dict(zip(self.shared_inputs,
+                                                      shared_vars)),
+                                     copy_inputs_over=False)
+        (new_inputs, new_outputs,
+         [clone_d, update_d, update_expr, shared_inputs]) = new
+        assert len(new_inputs) == len(inputs) + len(self.shared_inputs)
+        assert len(new_outputs) == len(outputs)
+        assert not update_d
+        assert not update_expr
+        assert not shared_inputs
+
+        self.new_inputs = new_inputs
+        self.new_outputs = new_outputs
        self.inputs = inputs
        self.outputs = outputs
+        self.kwargs = kwargs
        self.input_types = [input.type for input in inputs]
        self.output_types = [output.type for output in outputs]

-        if grad_depth > 0:
-            output_grads = [t() for t in self.output_types]
-            # OpFromGraph doesn't implement a connection_pattern, so for now we regard
-            # all inputs and outputs as connected. This will compute the right numerical
-            # value for the gradients but could fail to raise the disconnected inputs error
-            # in some cases.
-            gs = G.grad(cost=None, known_grads=dict(zip(self.outputs, output_grads)),
-                    wrt=self.inputs, disconnected_inputs='ignore')
-            self.grad_ops = []
-            for g in gs:
-                if g is None:
-                    self.grad_ops.append(lambda *args: None)
-                else:
-                    # It is normal if some inputs are not needed in order
-                    # to compute the gradient, so we ignore them.
-                    self.grad_ops.append(OpFromGraph(inputs + output_grads,
-                                                     [g],
-                                                     grad_depth=grad_depth - 1,
-                                                     on_unused_input='ignore'))

    def __eq__(self, other):
        #TODO: recognize a copy
@@ -87,9 +113,18 @@ class OpFromGraph(gof.Op):
                raise TypeError("Wrong type, expected %s but got %s"
                        % (type, input.type))
        return gof.Apply(self,
-                         inputs,
+                         list(inputs) + self.shared_inputs,
                         [type() for type in self.output_types])

+    def make_thunk(self, node, storage_map, compute_map, no_recycling):
+        ret = super(OpFromGraph, self).make_thunk(node, storage_map,
+                                                  compute_map, no_recycling)
+        if not hasattr(self, "fn"):
+            self.fn = orig_function(self.new_inputs,
+                                    self.new_outputs,
+                                    **self.kwargs)
+        return ret
+
    def perform(self, node, inputs, outputs):
        variables = self.fn(*inputs)
        assert len(variables) == len(outputs)
@@ -99,10 +134,32 @@ class OpFromGraph(gof.Op):
            output[0] = variable.copy()

    def grad(self, inputs, output_grads):
-        if hasattr(self, 'grad_ops'):
-            return [go(*(inputs + output_grads)) for go in self.grad_ops]
+        # OpFromGraph doesn't implement a connection_pattern, so for
+        # now we regard all inputs and outputs as connected. This will
+        # compute the right numerical value for the gradients but
+        # could fail to raise the disconnected inputs error in some
+        # cases.
+        if hasattr(self, "grad_ops"):
+            grad_ops = self.grad_ops
        else:
-            raise NotImplementedError
+            gs = G.grad(cost=None,
+                        known_grads=dict(zip(self.new_outputs, output_grads)),
+                        wrt=self.new_inputs,
+                        disconnected_inputs='ignore')
+
+            grad_ops = []
+            for g in gs:
+                if g is None:
+                    grad_ops.append(lambda *args: None)
+                else:
+                    # It is normal if some inputs are not needed in order
+                    # to compute the gradient, so we ignore them.
+                    grad_ops.append(OpFromGraph(self.new_inputs + output_grads,
+                                                [g],
+                                                on_unused_input='ignore'))
+            self.grad_ops = grad_ops
+
+        return [go(*(inputs + output_grads)) for go in grad_ops]

 # Since OpFromGraph contains a Theano compiled function, we should let
 # DebugMode know about it

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -1036,7 +1036,7 @@ class FunctionMaker(object):

        # initialize the linker
        if not hasattr(linker, 'accept'):
-            raise ValueError("'linker' parameter of FunctionFactory should be a Linker with an accept method " \
+            raise ValueError("'linker' parameter of FunctionMaker should be a Linker with an accept method " \
                             "or one of %s" % theano.compile.mode.predefined_linkers.keys())

        #the 'no_borrow' outputs are the ones for which that we can't return the internal storage pointer.

--- a/theano/compile/tests/test_builders.py
+++ b/theano/compile/tests/test_builders.py
 import numpy
 import unittest

-from theano import config
+from theano import config, shared

 from theano.compile import function

@@ -17,7 +17,9 @@ class T_OpFromGraph(unittest.TestCase):
        x, y, z = T.matrices('xyz')
        e = x + y * z
        op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
-        f = op(x, y, z) - op(y, z, x)  # (1+3*5=array of 16) - (3+1*5=array of 8)
+        # (1+3*5=array of 16) - (3+1*5=array of 8)
+        f = op(x, y, z) - op(y, z, x)
+
        fn = function([x, y, z], f)
        xv = numpy.ones((2, 2), dtype=config.floatX)
        yv = numpy.ones((2, 2), dtype=config.floatX)*3
@@ -47,7 +49,7 @@ class T_OpFromGraph(unittest.TestCase):
    def test_grad(self):
        x, y, z = T.matrices('xyz')
        e = x + y * z
-        op = OpFromGraph([x, y, z], [e], mode='FAST_RUN', grad_depth=2)
+        op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
        f = op(x, y, z)
        f = f - T.grad(T.sum(f), y)
        fn = function([x, y, z], f)
@@ -56,6 +58,56 @@ class T_OpFromGraph(unittest.TestCase):
        zv = numpy.ones((2, 2), dtype=config.floatX)*5
        assert numpy.all(11.0 == fn(xv, yv, zv))

+    def test_grad_grad(self):
+        x, y, z = T.matrices('xyz')
+        e = x + y * z
+        op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
+        f = op(x, y, z)
+        f = f - T.grad(T.sum(f), y)
+        f = f - T.grad(T.sum(f), y)
+        fn = function([x, y, z], f)
+        xv = numpy.ones((2, 2), dtype=config.floatX)
+        yv = numpy.ones((2, 2), dtype=config.floatX)*3
+        zv = numpy.ones((2, 2), dtype=config.floatX)*5
+        assert numpy.allclose(6.0, fn(xv, yv, zv))
+
+    def test_shared(self):
+        x, y, z = T.matrices('xyz')
+        s = shared(numpy.random.rand(2, 2).astype(config.floatX))
+        e = x + y * z + s
+        op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
+        # (1+3*5=array of 16) - (3+1*5=array of 8)
+        f = op(x, y, z) - op(y, z, x)
+
+        fn = function([x, y, z], f)
+        xv = numpy.ones((2, 2), dtype=config.floatX)
+        yv = numpy.ones((2, 2), dtype=config.floatX)*3
+        zv = numpy.ones((2, 2), dtype=config.floatX)*5
+        #print function, function.__module__
+        #print fn.maker.fgraph.toposort()
+        assert numpy.allclose(8.0, fn(xv, yv, zv))
+        assert numpy.allclose(8.0, fn(xv, yv, zv))
+
+    def test_shared_grad(self):
+        x, y, z = T.matrices('xyz')
+        s = shared(numpy.random.rand(2, 2).astype(config.floatX))
+        e = x + y * z + s
+        op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
+        f = op(x, y, z)
+        f = f - T.grad(T.sum(f), y)
+        fn = function([x, y, z], f)
+        xv = numpy.ones((2, 2), dtype=config.floatX)
+        yv = numpy.ones((2, 2), dtype=config.floatX) * 3
+        zv = numpy.ones((2, 2), dtype=config.floatX) * 5
+        assert numpy.allclose(11.0 + s.get_value(), fn(xv, yv, zv))
+
+        # grad again the shared variable
+        f = op(x, y, z)
+        f = f - T.grad(T.sum(f), s)
+        fn = function([x, y, z], f)
+        assert numpy.allclose(15.0 + s.get_value(),
+                              fn(xv, yv, zv))
+

 if __name__ == '__main__':
    unittest.main()