提交 eb1fbfd5 authored 作者: Frederic's avatar Frederic

Remove OpFromGraph grad_depth parameter.

上级 48e12b47
...@@ -12,13 +12,6 @@ class OpFromGraph(gof.Op): ...@@ -12,13 +12,6 @@ class OpFromGraph(gof.Op):
`Op` perform will do the same operation as:: `Op` perform will do the same operation as::
function(inputs, outputs, **kwargs) function(inputs, outputs, **kwargs)
OpFromGraph takes an additional input, grad_depth. If grad_depth
is n, OpFromGraph will make special Ops for gradients up to the
nth level, allowing the user to differentiate this op up to n
times. The parameter defaults to 1. If grad_depth == 0, the op
will not be differentiable.
Example: Example:
x, y, z = tensor.scalars('xyz') x, y, z = tensor.scalars('xyz')
e = x + y * z e = x + y * z
...@@ -34,10 +27,9 @@ class OpFromGraph(gof.Op): ...@@ -34,10 +27,9 @@ class OpFromGraph(gof.Op):
- c_code() to remove the double overhead? - c_code() to remove the double overhead?
- move call to function to make_thunk(). - move call to function to make_thunk().
- opt to unfold it, work inplace on inputs - opt to unfold it, work inplace on inputs
- move grad stuff from __init__ to grad()
""" """
def __init__(self, inputs, outputs, grad_depth=1, **kwargs): def __init__(self, inputs, outputs, **kwargs):
if not isinstance(outputs, list): if not isinstance(outputs, list):
raise TypeError('outputs must be list', outputs) raise TypeError('outputs must be list', outputs)
for i in inputs + outputs: for i in inputs + outputs:
...@@ -60,25 +52,6 @@ class OpFromGraph(gof.Op): ...@@ -60,25 +52,6 @@ class OpFromGraph(gof.Op):
self.input_types = [input.type for input in inputs] self.input_types = [input.type for input in inputs]
self.output_types = [output.type for output in outputs] self.output_types = [output.type for output in outputs]
if grad_depth > 0:
output_grads = [t() for t in self.output_types]
# OpFromGraph doesn't implement a connection_pattern, so for now we regard
# all inputs and outputs as connected. This will compute the right numerical
# value for the gradients but could fail to raise the disconnected inputs error
# in some cases.
gs = G.grad(cost=None, known_grads=dict(zip(self.outputs, output_grads)),
wrt=self.inputs, disconnected_inputs='ignore')
self.grad_ops = []
for g in gs:
if g is None:
self.grad_ops.append(lambda *args: None)
else:
# It is normal if some inputs are not needed in order
# to compute the gradient, so we ignore them.
self.grad_ops.append(OpFromGraph(inputs + output_grads,
[g],
grad_depth=grad_depth - 1,
on_unused_input='ignore'))
def __eq__(self, other): def __eq__(self, other):
#TODO: recognize a copy #TODO: recognize a copy
...@@ -106,10 +79,24 @@ class OpFromGraph(gof.Op): ...@@ -106,10 +79,24 @@ class OpFromGraph(gof.Op):
output[0] = variable.copy() output[0] = variable.copy()
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
if hasattr(self, 'grad_ops'): # OpFromGraph doesn't implement a connection_pattern, so for now we regard
return [go(*(inputs + output_grads)) for go in self.grad_ops] # all inputs and outputs as connected. This will compute the right numerical
# value for the gradients but could fail to raise the disconnected inputs error
# in some cases.
gs = G.grad(cost=None, known_grads=dict(zip(self.outputs, output_grads)),
wrt=self.inputs, disconnected_inputs='ignore')
grad_ops = []
for g in gs:
if g is None:
grad_ops.append(lambda *args: None)
else: else:
raise NotImplementedError # It is normal if some inputs are not needed in order
# to compute the gradient, so we ignore them.
grad_ops.append(OpFromGraph(self.inputs + output_grads,
[g],
on_unused_input='ignore'))
return [go(*(inputs + output_grads)) for go in grad_ops]
# Since OpFromGraph contains a Theano compiled function, we should let # Since OpFromGraph contains a Theano compiled function, we should let
# DebugMode know about it # DebugMode know about it
......
...@@ -47,7 +47,7 @@ class T_OpFromGraph(unittest.TestCase): ...@@ -47,7 +47,7 @@ class T_OpFromGraph(unittest.TestCase):
def test_grad(self): def test_grad(self):
x, y, z = T.matrices('xyz') x, y, z = T.matrices('xyz')
e = x + y * z e = x + y * z
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN', grad_depth=2) op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z) f = op(x, y, z)
f = f - T.grad(T.sum(f), y) f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f) fn = function([x, y, z], f)
...@@ -56,6 +56,19 @@ class T_OpFromGraph(unittest.TestCase): ...@@ -56,6 +56,19 @@ class T_OpFromGraph(unittest.TestCase):
zv = numpy.ones((2, 2), dtype=config.floatX)*5 zv = numpy.ones((2, 2), dtype=config.floatX)*5
assert numpy.all(11.0 == fn(xv, yv, zv)) assert numpy.all(11.0 == fn(xv, yv, zv))
def test_grad_grad(self):
x, y, z = T.matrices('xyz')
e = x + y * z
op = OpFromGraph([x, y, z], [e], mode='FAST_RUN')
f = op(x, y, z)
f = f - T.grad(T.sum(f), y)
f = f - T.grad(T.sum(f), y)
fn = function([x, y, z], f)
xv = numpy.ones((2, 2), dtype=config.floatX)
yv = numpy.ones((2, 2), dtype=config.floatX)*3
zv = numpy.ones((2, 2), dtype=config.floatX)*5
assert numpy.allclose(6.0, fn(xv, yv, zv))
def test_shared(self): def test_shared(self):
x, y, z = T.matrices('xyz') x, y, z = T.matrices('xyz')
s = shared(numpy.random.rand(2, 2).astype(config.floatX)) s = shared(numpy.random.rand(2, 2).astype(config.floatX))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论