提交 c0fda9c0 authored 作者: khaotik's avatar khaotik 提交者: khaotik

grad_overrides now use syntax as in docstring

上级 7608602e
...@@ -69,7 +69,7 @@ from theano.compile import ( ...@@ -69,7 +69,7 @@ from theano.compile import (
Mode, Mode,
predefined_modes, predefined_linkers, predefined_optimizers, predefined_modes, predefined_linkers, predefined_optimizers,
FunctionMaker, function, function_dump, FunctionMaker, function, function_dump,
OpFromGraph, OpFromGrpahInline, OpFromGraphPrecompiled, op_from_graph OpFromGraph, OpFromGraphInline, OpFromGraphPrecompiled, op_from_graph,
ProfileStats, ProfileStats,
Param, shared, as_op) Param, shared, as_op)
......
...@@ -69,7 +69,7 @@ class OpFromGraphBase(gof.Op): ...@@ -69,7 +69,7 @@ class OpFromGraphBase(gof.Op):
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
if self.cached_grad_ops: if self.cached_grad_ops:
return self.grad_ops(inputs+output_grads) return self.grad_ops(inputs, output_grads)
grad_inps = self.internal_inputs + output_grads grad_inps = self.internal_inputs + output_grads
upstream_grads = dict(izip(self.internal_outputs, output_grads)) upstream_grads = dict(izip(self.internal_outputs, output_grads))
...@@ -84,17 +84,18 @@ class OpFromGraphBase(gof.Op): ...@@ -84,17 +84,18 @@ class OpFromGraphBase(gof.Op):
# to compute the gradient, so we ignore them. # to compute the gradient, so we ignore them.
gs = [go if go else type(self)( gs = [go if go else type(self)(
grad_inps, grad_inps,
theano.gradient.grad( (lambda g: g if g else (lambda *a:None))(
cost=None, theano.gradient.grad(
known_grads=upstream_grads, cost=None,
wrt=[inp], known_grads=upstream_grads,
disconnected_inputs='ignore'), wrt=[inp],
on_unused_input='ignore' disconnected_inputs='ignore')
), on_unused_input='ignore'
) for go, inp in izip(grad_ops_l, self.internal_inputs)] ) for go, inp in izip(grad_ops_l, self.internal_inputs)]
# since OpFromGraphBase only accepts and outputs list, # since OpFromGraphBase only accepts input sequence,
# additional filtering is needed # additional filtering is needed
grad_ops = lambda inps:[ grad_ops = lambda inps,grds:[
(go(inps) if ov else go(*inps)) (go(inps, grds) if ov else go(*(inps+grds)))
for go, ov in izip(gs, grad_ops_l)] for go, ov in izip(gs, grad_ops_l)]
else: else:
grad_ops = grad_ops_l grad_ops = grad_ops_l
...@@ -113,10 +114,10 @@ class OpFromGraphBase(gof.Op): ...@@ -113,10 +114,10 @@ class OpFromGraphBase(gof.Op):
grad_ops_l.append(type(self)(grad_inps, grad_ops_l.append(type(self)(grad_inps,
[g], [g],
on_unused_input='ignore')) on_unused_input='ignore'))
grad_ops = lambda inps:[go(*inps) for go in grad_ops_l] grad_ops = lambda inps, grds:[go(*(inps+grds)) for go in grad_ops_l]
self.grad_ops = grad_ops self.grad_ops = grad_ops
self.cached_grad_ops = True self.cached_grad_ops = True
return grad_ops(inputs+output_grads) return grad_ops(inputs, output_grads)
def make_node(self, *inputs): def make_node(self, *inputs):
for input, type in zip(inputs, self.input_types): for input, type in zip(inputs, self.input_types):
...@@ -191,6 +192,8 @@ class OpFromGraphInline(OpFromGraphBase): ...@@ -191,6 +192,8 @@ class OpFromGraphInline(OpFromGraphBase):
@gof.local_optimizer([OpFromGraphInline]) @gof.local_optimizer([OpFromGraphInline])
def inline_ofg_expansion(node): def inline_ofg_expansion(node):
""" This optimization expands internal graph of OpFromGraphInline
"""
op = node.op op = node.op
if not isinstance(op, OpFromGraphInline): if not isinstance(op, OpFromGraphInline):
return False return False
...@@ -205,6 +208,8 @@ optdb.register( ...@@ -205,6 +208,8 @@ optdb.register(
gof.opt.in2out(inline_ofg_expansion), gof.opt.in2out(inline_ofg_expansion),
0.5, 'fast_compile', 'fast_run') 0.5, 'fast_compile', 'fast_run')
# Since OpFromGraphPrecompiled contains a Theano compiled function,
# we should let DebugMode know about it
ops_with_inner_function[OpFromGraphPrecompiled] = 'fn' ops_with_inner_function[OpFromGraphPrecompiled] = 'fn'
# for backward compatibility # for backward compatibility
...@@ -227,16 +232,18 @@ def op_from_graph( ...@@ -227,16 +232,18 @@ def op_from_graph(
inputs: list of variables inputs: list of variables
outputs: list of variables outputs: list of variables
inline: bool inline: bool, optional
if True, will cause the Op's original graph being used during if True, will cause the Op's original graph being used during
compilation, otherwise will use a pre-compiled function inside. compilation, otherwise will use a pre-compiled function inside.
grad_overrides: None | function | list of (None|function) grad_overrides: None | function | list of (None|function), optional
Used to override default gradient routine. Used to override default gradient routine.
Overriding function must take two list as inputs: original inputs Overriding function(s) must take two list of variable as inputs,
and upstream gradients the original inputs and upstream gradients
If is None, will use default gradient routine. For different `grad_overrides`:
If is function, must return list of Variable.
If is list, each function must return a single Variable. The order - `None` : will use default gradient routine.
- function : must return list of Variable.
- list : each function must return a single Variable. The order
of the list must corresponds to inputs of the list must corresponds to inputs
Notes Notes
...@@ -263,7 +270,7 @@ def op_from_graph( ...@@ -263,7 +270,7 @@ def op_from_graph(
invisible to the user. They can be as input to the node or in the invisible to the user. They can be as input to the node or in the
inner graph. inner graph.
- We support unused inputs. This is needed for the grad. - We support unused inputs. This is needed for the grad.
- inline=True will cause better optimization at the cost of longer - `inline=True` will cause better runtime optimization at the cost of longer
compilation, only works with optimizer "fast_run" or "fast_compile" compilation, only works with optimizer "fast_run" or "fast_compile"
Examples Examples
...@@ -307,12 +314,12 @@ def op_from_graph( ...@@ -307,12 +314,12 @@ def op_from_graph(
x, y, z = inps x, y, z = inps
g = grads g = grads
return z*2 return z*2
op = op_from_graph( op = op_from_graph(
[x, y, z], [e], grad_overrides=[None, rescale_dy, None]) [x, y, z], [e], grad_overrides=[None, rescale_dy, None])
e2 = op(x, y, z) e2 = op(x, y, z)
dx, dy, dz = grad(e2, [x, y, z]) dx, dy, dz = grad(e2, [x, y, z])
fn = function([x, y, z], [dx, dy, dz]) fn = function([x, y, z], [dx, dy, dz])
# the graident wrt y is now doubled
fn(2., 3., 4.) # [1., 8., 3.] fn(2., 3., 4.) # [1., 8., 3.]
""" """
......
...@@ -124,12 +124,13 @@ class T_OpFromGraph(unittest_tools.InferShapeTester): ...@@ -124,12 +124,13 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
def test_grad_override(self, cls_ofg): def test_grad_override(self, cls_ofg):
x,y = T.vectors('xy') x,y = T.vectors('xy')
def go(args): def go(inps, gs):
x, y, g = args x, y = inps
g = gs[0]
return [g*y*2, g*x*1.5] return [g*y*2, g*x*1.5]
# no override is coverd in "grad" test # no override case is coverd in "grad" test
# single override # single override case
op_mul = cls_ofg([x, y], [x*y], grad_overrides=go) op_mul = cls_ofg([x, y], [x*y], grad_overrides=go)
xx,yy = T.vector('xx'), T.vector('yy') xx,yy = T.vector('xx'), T.vector('yy')
zz = T.sum(op_mul(xx,yy)) zz = T.sum(op_mul(xx,yy))
...@@ -141,13 +142,15 @@ class T_OpFromGraph(unittest_tools.InferShapeTester): ...@@ -141,13 +142,15 @@ class T_OpFromGraph(unittest_tools.InferShapeTester):
assert numpy.allclose(yv*2, dxv) assert numpy.allclose(yv*2, dxv)
assert numpy.allclose(xv*1.5, dyv) assert numpy.allclose(xv*1.5, dyv)
# list override # list override case
def go1(args): def go1(inps, gs):
x, w, b, g = args x, w, b = inps
g = gs[0]
return g*w*2 return g*w*2
def go2(args): def go2(inps, gs):
x, w, b, g = args x, w, b = inps
g = gs[0]
return g*x*1.5 return g*x*1.5
w, b = T.vectors('wb') w, b = T.vectors('wb')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论