提交 defa003a authored 作者: abergeron's avatar abergeron

Merge pull request #1683 from nouiz/opfromgraph

OpFromGraph documentation and cleanup
.. _opfromgraph:
===========
OpFromGraph
===========
This page descripbe :class:`theano.OpFromGraph
<theano.compile.builders.OpFromGraph>`. an Op that allow to
encapsulate a Theano graph in an op.
This can be used to encapsulate some functionality in one block. It is
useful to scale Theano compilation for regular bigger graph when we
reuse that encapsulated fonctionality with different inputs many
times. Due to this encapsulation, it can make Theano compilation phase
faster for graph with many nodes.
Using this for small graph isn't recommanded as it disable
optimization between what is inside the encapsulation and outside it.
.. note:
This wasn't used widely up to now. If you have any
questions/comments don't contact us on the mailing list.
.. autoclass:: theano.compile.builders.OpFromGraph
......@@ -9,8 +9,6 @@ from theano.compile.mode import *
from theano.compile.io import *
from theano.compile.builders import *
from theano.compile.module import *
from theano.compile.debugmode import DebugMode
......@@ -25,4 +23,6 @@ from theano.compile.sharedvalue import (shared, shared_constructor,
SharedVariable)
from theano.compile.pfunc import pfunc, Param, rebuild_collect_shared
from theano.compile.builders import *
from theano.compile.function import function
from theano import gof
from theano import gradient as G
from theano.compile.function_module import orig_function
from theano.compile import SharedVariable, rebuild_collect_shared
from theano.gof import ops_with_inner_function
class OpFromGraph(gof.Op):
"""
This create an L{Op} from a list of input variables and a list of output
variables.
The signature is the same as the signature of L{FunctionFactory}
and/or function and the resulting L{Op}'s perform will do the same
operation as::
function(inputs, outputs, **kwargs)
Take note that the following options, if provided, must take the
value(s) listed below:
unpack_single = False
borrow_outputs = False
OpFromGraph takes an additional input, grad_depth. If grad_depth
is n, OpFromGraph will make special Ops for gradients up to the
nth level, allowing the user to differentiate this op up to n
times. The parameter defaults to 1. If grad_depth == 0, the op
will not be differentiable.
Example:
x, y, z = tensor.scalars('xyz')
e = x + y * z
op = OpFromGraph([x, y, z], [e], linker='c')
# op behaves like a normal theano op
e2 = op(x, y, z) + op(z, y, x)
fn = function([x, y, z], [e2])
"""This create an `Op` from inputs and outputs list of variables.
The signature is similar to theano.function() and the resulting
`Op` perform will do the same operation as::
orig_function(inputs, outputs, **kwargs)
TODO:
- examples for a multi-layer mlp. where?
- __hash__, __eq__ otherwise won't merge, try gof.opt.is_same_graph_with_merge(op1.new_outputs, op2, new_outputs)
- c_code() to remove the double overhead?
- opt to unfold it, work inplace on inputs
- grad() make it support DisconnectedType and the new interface
- check how it work with updates.
- add test with constant as input or inside the inner graph.
- Add support for the GPU? Probably just need an opt to remove transfer
- Add support to pickle this Op.
- Add support/test with random generator
:note:
- We support shared variable in the inner graph. This is automatic and
invisible to the user. They can be as input to the node or in the
inner graph.
- We support unused inputs. This is needed for the grad.
Example 1:
.. code-block:: python
from theano import function, OpFromGraph, tensor
x, y, z = tensor.scalars('xyz')
e = x + y * z
op = OpFromGraph([x, y, z], [e])
# op behaves like a normal theano op
e2 = op(x, y, z) + op(z, y, x)
fn = function([x, y, z], [e2])
Example 2 with shared variable:
.. code-block:: python
import numpy
import theano
from theano import config, function, OpFromGraph, tensor
x, y, z = tensor.scalars('xyz')
s = theano.shared(numpy.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e])
# op behaves like a normal theano op
e2 = op(x, y, z) + op(z, y, x)
fn = function([x, y, z], [e2])
"""
def __init__(self, inputs, outputs, grad_depth=1, **kwargs):
def __init__(self, inputs, outputs, **kwargs):
if not isinstance(outputs, list):
raise TypeError('outputs must be list', outputs)
for i in inputs + outputs:
......@@ -44,34 +71,33 @@ class OpFromGraph(gof.Op):
if 'updates' in kwargs:
raise TypeError('updates are not allowed in kwargs')
# TODO: the graph may have implicit inputs like
# SharedVariable instances.
# what impact to they have on the validity of this Op?
self.fn = orig_function(inputs, outputs, **kwargs)
# To support correctly shared variables the inner fct should
# not see them. Otherwise their is problem with the gradient.
self.shared_inputs = [var for var in gof.graph.inputs(outputs)
if isinstance(var, SharedVariable)]
used_inputs = [var for var in gof.graph.inputs(outputs)
if not isinstance(var, gof.Constant)]
shared_vars = [var.type() for var in self.shared_inputs]
new = rebuild_collect_shared(outputs, inputs=inputs + shared_vars,
replace=dict(zip(self.shared_inputs,
shared_vars)),
copy_inputs_over=False)
(new_inputs, new_outputs,
[clone_d, update_d, update_expr, shared_inputs]) = new
assert len(new_inputs) == len(inputs) + len(self.shared_inputs)
assert len(new_outputs) == len(outputs)
assert not update_d
assert not update_expr
assert not shared_inputs
self.new_inputs = new_inputs
self.new_outputs = new_outputs
self.inputs = inputs
self.outputs = outputs
self.kwargs = kwargs
self.input_types = [input.type for input in inputs]
self.output_types = [output.type for output in outputs]
if grad_depth > 0:
output_grads = [t() for t in self.output_types]
# OpFromGraph doesn't implement a connection_pattern, so for now we regard
# all inputs and outputs as connected. This will compute the right numerical
# value for the gradients but could fail to raise the disconnected inputs error
# in some cases.
gs = G.grad(cost=None, known_grads=dict(zip(self.outputs, output_grads)),
wrt=self.inputs, disconnected_inputs='ignore')
self.grad_ops = []
for g in gs:
if g is None:
self.grad_ops.append(lambda *args: None)
else:
# It is normal if some inputs are not needed in order
# to compute the gradient, so we ignore them.
self.grad_ops.append(OpFromGraph(inputs + output_grads,
[g],
grad_depth=grad_depth - 1,
on_unused_input='ignore'))
def __eq__(self, other):
#TODO: recognize a copy
......@@ -87,9 +113,18 @@ class OpFromGraph(gof.Op):
raise TypeError("Wrong type, expected %s but got %s"
% (type, input.type))
return gof.Apply(self,
inputs,
list(inputs) + self.shared_inputs,
[type() for type in self.output_types])
def make_thunk(self, node, storage_map, compute_map, no_recycling):
ret = super(OpFromGraph, self).make_thunk(node, storage_map,
compute_map, no_recycling)
if not hasattr(self, "fn"):
self.fn = orig_function(self.new_inputs,
self.new_outputs,
**self.kwargs)
return ret
def perform(self, node, inputs, outputs):
variables = self.fn(*inputs)
assert len(variables) == len(outputs)
......@@ -99,10 +134,32 @@ class OpFromGraph(gof.Op):
output[0] = variable.copy()
def grad(self, inputs, output_grads):
if hasattr(self, 'grad_ops'):
return [go(*(inputs + output_grads)) for go in self.grad_ops]
# OpFromGraph doesn't implement a connection_pattern, so for
# now we regard all inputs and outputs as connected. This will
# compute the right numerical value for the gradients but
# could fail to raise the disconnected inputs error in some
# cases.
if hasattr(self, "grad_ops"):
grad_ops = self.grad_ops
else:
raise NotImplementedError
gs = G.grad(cost=None,
known_grads=dict(zip(self.new_outputs, output_grads)),
wrt=self.new_inputs,
disconnected_inputs='ignore')
grad_ops = []
for g in gs:
if g is None:
grad_ops.append(lambda *args: None)
else:
# It is normal if some inputs are not needed in order
# to compute the gradient, so we ignore them.
grad_ops.append(OpFromGraph(self.new_inputs + output_grads,
[g],
on_unused_input='ignore'))
self.grad_ops = grad_ops
return [go(*(inputs + output_grads)) for go in grad_ops]
# Since OpFromGraph contains a Theano compiled function, we should let
# DebugMode know about it
......
......@@ -1036,7 +1036,7 @@ class FunctionMaker(object):
# initialize the linker
if not hasattr(linker, 'accept'):
raise ValueError("'linker' parameter of FunctionFactory should be a Linker with an accept method " \
raise ValueError("'linker' parameter of FunctionMaker should be a Linker with an accept method " \
"or one of %s" % theano.compile.mode.predefined_linkers.keys())
#the 'no_borrow' outputs are the ones for which that we can't return the internal storage pointer.
......
import numpy
import unittest
from theano import config
from theano import config, shared
from theano.compile import function
......@@ -17,7 +17,9 @@ class T_OpFromGraph(unittest.TestCase):
x, y, z = T.matrices('xyz')
e = x + y * z
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z) - op(y, z, x) # (1+3*5=array of 16) - (3+1*5=array of 8)
# (1+3*5=array of 16) - (3+1*5=array of 8)
f = op(x, y, z) - op(y, z, x)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX)*3
......@@ -47,7 +49,7 @@ class T_OpFromGraph(unittest.TestCase):
def test_grad(self):
x, y, z = T.matrices('xyz')
e = x + y * z
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN', grad_depth=2)
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z)
f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f)
......@@ -56,6 +58,56 @@ class T_OpFromGraph(unittest.TestCase):
zv = numpy.ones((2, 2), dtype=config.floatX)*5
assert numpy.all(11.0 == fn(xv, yv, zv))
def test_grad_grad(self):
x, y, z = T.matrices('xyz')
e = x + y * z
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z)
f = f - T.grad(T.sum(f), y)
f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX)*3
zv = numpy.ones((2, 2), dtype=config.floatX)*5
assert numpy.allclose(6.0, fn(xv, yv, zv))
def test_shared(self):
x, y, z = T.matrices('xyz')
s = shared(numpy.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
# (1+3*5=array of 16) - (3+1*5=array of 8)
f = op(x, y, z) - op(y, z, x)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX)*3
zv = numpy.ones((2, 2), dtype=config.floatX)*5
#print function, function.__module__
#print fn.maker.fgraph.toposort()
assert numpy.allclose(8.0, fn(xv, yv, zv))
assert numpy.allclose(8.0, fn(xv, yv, zv))
def test_shared_grad(self):
x, y, z = T.matrices('xyz')
s = shared(numpy.random.rand(2, 2).astype(config.floatX))
e = x + y * z + s
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z)
f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX) * 3
zv = numpy.ones((2, 2), dtype=config.floatX) * 5
assert numpy.allclose(11.0 + s.get_value(), fn(xv, yv, zv))
# grad again the shared variable
f = op(x, y, z)
f = f - T.grad(T.sum(f), s)
fn = function([x, y, z], f)
assert numpy.allclose(15.0 + s.get_value(),
fn(xv, yv, zv))
if __name__ == '__main__':
unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论