提交 defa003a authored 作者: abergeron's avatar abergeron

Merge pull request #1683 from nouiz/opfromgraph

OpFromGraph documentation and cleanup
.. _opfromgraph:
===========
OpFromGraph
===========
This page descripbe :class:`theano.OpFromGraph
<theano.compile.builders.OpFromGraph>`. an Op that allow to
encapsulate a Theano graph in an op.
This can be used to encapsulate some functionality in one block. It is
useful to scale Theano compilation for regular bigger graph when we
reuse that encapsulated fonctionality with different inputs many
times. Due to this encapsulation, it can make Theano compilation phase
faster for graph with many nodes.
Using this for small graph isn't recommanded as it disable
optimization between what is inside the encapsulation and outside it.
.. note:
This wasn't used widely up to now. If you have any
questions/comments don't contact us on the mailing list.
.. autoclass:: theano.compile.builders.OpFromGraph
...@@ -9,8 +9,6 @@ from theano.compile.mode import * ...@@ -9,8 +9,6 @@ from theano.compile.mode import *
from theano.compile.io import * from theano.compile.io import *
from theano.compile.builders import *
from theano.compile.module import * from theano.compile.module import *
from theano.compile.debugmode import DebugMode from theano.compile.debugmode import DebugMode
...@@ -25,4 +23,6 @@ from theano.compile.sharedvalue import (shared, shared_constructor, ...@@ -25,4 +23,6 @@ from theano.compile.sharedvalue import (shared, shared_constructor,
SharedVariable) SharedVariable)
from theano.compile.pfunc import pfunc, Param, rebuild_collect_shared from theano.compile.pfunc import pfunc, Param, rebuild_collect_shared
from theano.compile.builders import *
from theano.compile.function import function from theano.compile.function import function
from theano import gof from theano import gof
from theano import gradient as G from theano import gradient as G
from theano.compile.function_module import orig_function from theano.compile.function_module import orig_function
from theano.compile import SharedVariable, rebuild_collect_shared
from theano.gof import ops_with_inner_function from theano.gof import ops_with_inner_function
class OpFromGraph(gof.Op): class OpFromGraph(gof.Op):
""" """This create an `Op` from inputs and outputs list of variables.
This create an L{Op} from a list of input variables and a list of output
variables. The signature is similar to theano.function() and the resulting
`Op` perform will do the same operation as::
The signature is the same as the signature of L{FunctionFactory}
and/or function and the resulting L{Op}'s perform will do the same orig_function(inputs, outputs, **kwargs)
operation as::
function(inputs, outputs, **kwargs) TODO:
- examples for a multi-layer mlp. where?
Take note that the following options, if provided, must take the - __hash__, __eq__ otherwise won't merge, try gof.opt.is_same_graph_with_merge(op1.new_outputs, op2, new_outputs)
value(s) listed below: - c_code() to remove the double overhead?
unpack_single = False - opt to unfold it, work inplace on inputs
borrow_outputs = False - grad() make it support DisconnectedType and the new interface
- check how it work with updates.
OpFromGraph takes an additional input, grad_depth. If grad_depth - add test with constant as input or inside the inner graph.
is n, OpFromGraph will make special Ops for gradients up to the - Add support for the GPU? Probably just need an opt to remove transfer
nth level, allowing the user to differentiate this op up to n - Add support to pickle this Op.
times. The parameter defaults to 1. If grad_depth == 0, the op - Add support/test with random generator
will not be differentiable. :note:
- We support shared variable in the inner graph. This is automatic and
Example: invisible to the user. They can be as input to the node or in the
inner graph.
- We support unused inputs. This is needed for the grad.
Example 1:
.. code-block:: python
from theano import function, OpFromGraph, tensor
x, y, z = tensor.scalars('xyz') x, y, z = tensor.scalars('xyz')
e = x + y * z e = x + y * z
op = OpFromGraph([x, y, z], [e], linker='c') op = OpFromGraph([x, y, z], [e])
# op behaves like a normal theano op
e2 = op(x, y, z) + op(z, y, x)
fn = function([x, y, z], [e2])
Example 2 with shared variable:
.. code-block:: python
import numpy
import theano
from theano import config, function, OpFromGraph, tensor
x, y, z = tensor.scalars('xyz')
s = theano.shared(numpy.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e])
# op behaves like a normal theano op # op behaves like a normal theano op
e2 = op(x, y, z) + op(z, y, x) e2 = op(x, y, z) + op(z, y, x)
fn = function([x, y, z], [e2]) fn = function([x, y, z], [e2])
""" """
def __init__(self, inputs, outputs, grad_depth=1, **kwargs): def __init__(self, inputs, outputs, **kwargs):
if not isinstance(outputs, list): if not isinstance(outputs, list):
raise TypeError('outputs must be list', outputs) raise TypeError('outputs must be list', outputs)
for i in inputs + outputs: for i in inputs + outputs:
...@@ -44,34 +71,33 @@ class OpFromGraph(gof.Op): ...@@ -44,34 +71,33 @@ class OpFromGraph(gof.Op):
if 'updates' in kwargs: if 'updates' in kwargs:
raise TypeError('updates are not allowed in kwargs') raise TypeError('updates are not allowed in kwargs')
# TODO: the graph may have implicit inputs like # To support correctly shared variables the inner fct should
# SharedVariable instances. # not see them. Otherwise their is problem with the gradient.
# what impact to they have on the validity of this Op? self.shared_inputs = [var for var in gof.graph.inputs(outputs)
self.fn = orig_function(inputs, outputs, **kwargs) if isinstance(var, SharedVariable)]
used_inputs = [var for var in gof.graph.inputs(outputs)
if not isinstance(var, gof.Constant)]
shared_vars = [var.type() for var in self.shared_inputs]
new = rebuild_collect_shared(outputs, inputs=inputs + shared_vars,
replace=dict(zip(self.shared_inputs,
shared_vars)),
copy_inputs_over=False)
(new_inputs, new_outputs,
[clone_d, update_d, update_expr, shared_inputs]) = new
assert len(new_inputs) == len(inputs) + len(self.shared_inputs)
assert len(new_outputs) == len(outputs)
assert not update_d
assert not update_expr
assert not shared_inputs
self.new_inputs = new_inputs
self.new_outputs = new_outputs
self.inputs = inputs self.inputs = inputs
self.outputs = outputs self.outputs = outputs
self.kwargs = kwargs
self.input_types = [input.type for input in inputs] self.input_types = [input.type for input in inputs]
self.output_types = [output.type for output in outputs] self.output_types = [output.type for output in outputs]
if grad_depth > 0:
output_grads = [t() for t in self.output_types]
# OpFromGraph doesn't implement a connection_pattern, so for now we regard
# all inputs and outputs as connected. This will compute the right numerical
# value for the gradients but could fail to raise the disconnected inputs error
# in some cases.
gs = G.grad(cost=None, known_grads=dict(zip(self.outputs, output_grads)),
wrt=self.inputs, disconnected_inputs='ignore')
self.grad_ops = []
for g in gs:
if g is None:
self.grad_ops.append(lambda *args: None)
else:
# It is normal if some inputs are not needed in order
# to compute the gradient, so we ignore them.
self.grad_ops.append(OpFromGraph(inputs + output_grads,
[g],
grad_depth=grad_depth - 1,
on_unused_input='ignore'))
def __eq__(self, other): def __eq__(self, other):
#TODO: recognize a copy #TODO: recognize a copy
...@@ -87,9 +113,18 @@ class OpFromGraph(gof.Op): ...@@ -87,9 +113,18 @@ class OpFromGraph(gof.Op):
raise TypeError("Wrong type, expected %s but got %s" raise TypeError("Wrong type, expected %s but got %s"
% (type, input.type)) % (type, input.type))
return gof.Apply(self, return gof.Apply(self,
inputs, list(inputs) + self.shared_inputs,
[type() for type in self.output_types]) [type() for type in self.output_types])
def make_thunk(self, node, storage_map, compute_map, no_recycling):
ret = super(OpFromGraph, self).make_thunk(node, storage_map,
compute_map, no_recycling)
if not hasattr(self, "fn"):
self.fn = orig_function(self.new_inputs,
self.new_outputs,
**self.kwargs)
return ret
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
variables = self.fn(*inputs) variables = self.fn(*inputs)
assert len(variables) == len(outputs) assert len(variables) == len(outputs)
...@@ -99,10 +134,32 @@ class OpFromGraph(gof.Op): ...@@ -99,10 +134,32 @@ class OpFromGraph(gof.Op):
output[0] = variable.copy() output[0] = variable.copy()
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
if hasattr(self, 'grad_ops'): # OpFromGraph doesn't implement a connection_pattern, so for
return [go(*(inputs + output_grads)) for go in self.grad_ops] # now we regard all inputs and outputs as connected. This will
# compute the right numerical value for the gradients but
# could fail to raise the disconnected inputs error in some
# cases.
if hasattr(self, "grad_ops"):
grad_ops = self.grad_ops
else: else:
raise NotImplementedError gs = G.grad(cost=None,
known_grads=dict(zip(self.new_outputs, output_grads)),
wrt=self.new_inputs,
disconnected_inputs='ignore')
grad_ops = []
for g in gs:
if g is None:
grad_ops.append(lambda *args: None)
else:
# It is normal if some inputs are not needed in order
# to compute the gradient, so we ignore them.
grad_ops.append(OpFromGraph(self.new_inputs + output_grads,
[g],
on_unused_input='ignore'))
self.grad_ops = grad_ops
return [go(*(inputs + output_grads)) for go in grad_ops]
# Since OpFromGraph contains a Theano compiled function, we should let # Since OpFromGraph contains a Theano compiled function, we should let
# DebugMode know about it # DebugMode know about it
......
...@@ -1036,7 +1036,7 @@ class FunctionMaker(object): ...@@ -1036,7 +1036,7 @@ class FunctionMaker(object):
# initialize the linker # initialize the linker
if not hasattr(linker, 'accept'): if not hasattr(linker, 'accept'):
raise ValueError("'linker' parameter of FunctionFactory should be a Linker with an accept method " \ raise ValueError("'linker' parameter of FunctionMaker should be a Linker with an accept method " \
"or one of %s" % theano.compile.mode.predefined_linkers.keys()) "or one of %s" % theano.compile.mode.predefined_linkers.keys())
#the 'no_borrow' outputs are the ones for which that we can't return the internal storage pointer. #the 'no_borrow' outputs are the ones for which that we can't return the internal storage pointer.
......
import numpy import numpy
import unittest import unittest
from theano import config from theano import config, shared
from theano.compile import function from theano.compile import function
...@@ -17,7 +17,9 @@ class T_OpFromGraph(unittest.TestCase): ...@@ -17,7 +17,9 @@ class T_OpFromGraph(unittest.TestCase):
x, y, z = T.matrices('xyz') x, y, z = T.matrices('xyz')
e = x + y * z e = x + y * z
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN') op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z) - op(y, z, x) # (1+3*5=array of 16) - (3+1*5=array of 8) # (1+3*5=array of 16) - (3+1*5=array of 8)
f = op(x, y, z) - op(y, z, x)
fn = function([x, y, z], f) fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX) xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX)*3 yv = numpy.ones((2, 2), dtype=config.floatX)*3
...@@ -47,7 +49,7 @@ class T_OpFromGraph(unittest.TestCase): ...@@ -47,7 +49,7 @@ class T_OpFromGraph(unittest.TestCase):
def test_grad(self): def test_grad(self):
x, y, z = T.matrices('xyz') x, y, z = T.matrices('xyz')
e = x + y * z e = x + y * z
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN', grad_depth=2) op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z) f = op(x, y, z)
f = f - T.grad(T.sum(f), y) f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f) fn = function([x, y, z], f)
...@@ -56,6 +58,56 @@ class T_OpFromGraph(unittest.TestCase): ...@@ -56,6 +58,56 @@ class T_OpFromGraph(unittest.TestCase):
zv = numpy.ones((2, 2), dtype=config.floatX)*5 zv = numpy.ones((2, 2), dtype=config.floatX)*5
assert numpy.all(11.0 == fn(xv, yv, zv)) assert numpy.all(11.0 == fn(xv, yv, zv))
def test_grad_grad(self):
x, y, z = T.matrices('xyz')
e = x + y * z
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z)
f = f - T.grad(T.sum(f), y)
f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX)*3
zv = numpy.ones((2, 2), dtype=config.floatX)*5
assert numpy.allclose(6.0, fn(xv, yv, zv))
def test_shared(self):
x, y, z = T.matrices('xyz')
s = shared(numpy.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
# (1+3*5=array of 16) - (3+1*5=array of 8)
f = op(x, y, z) - op(y, z, x)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX)*3
zv = numpy.ones((2, 2), dtype=config.floatX)*5
#print function, function.__module__
#print fn.maker.fgraph.toposort()
assert numpy.allclose(8.0, fn(xv, yv, zv))
assert numpy.allclose(8.0, fn(xv, yv, zv))
def test_shared_grad(self):
x, y, z = T.matrices('xyz')
s = shared(numpy.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z)
f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5
assert numpy.allclose(11.0 + s.get_value(), fn(xv, yv, zv))
# grad again the shared variable
f = op(x, y, z)
f = f - T.grad(T.sum(f), s)
fn = function([x, y, z], f)
assert numpy.allclose(15.0 + s.get_value(),
fn(xv, yv, zv))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论