提交 b1f1e62a authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1649 from lamblin/fix_stack_grad

Fix other crash in hessian of stack()
...@@ -1636,12 +1636,17 @@ def hessian(cost, wrt, consider_constant=None, ...@@ -1636,12 +1636,17 @@ def hessian(cost, wrt, consider_constant=None,
assert input.ndim == 1, \ assert input.ndim == 1, \
"tensor.hessian expects a (list of) 1 dimensional variable "\ "tensor.hessian expects a (list of) 1 dimensional variable "\
"as `wrt`" "as `wrt`"
expr = grad(cost, input) expr = grad(cost, input, consider_constant=consider_constant,
disconnected_inputs=disconnected_inputs)
# It is possible that the inputs are disconnected from expr,
# even if they are connected to cost.
# This should not be an error.
hess, updates = theano.scan(lambda i, y, x: grad( hess, updates = theano.scan(lambda i, y, x: grad(
y[i], y[i],
x, x,
consider_constant=consider_constant, consider_constant=consider_constant,
disconnected_inputs=disconnected_inputs), disconnected_inputs='ignore'),
sequences=arange(expr.shape[0]), sequences=arange(expr.shape[0]),
non_sequences=[expr, input]) non_sequences=[expr, input])
assert not updates, \ assert not updates, \
......
...@@ -3073,9 +3073,42 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3073,9 +3073,42 @@ class T_Join_and_Split(unittest.TestCase):
# Test the gradient of stack when used in hessian, see gh-1589 # Test the gradient of stack when used in hessian, see gh-1589
a = tensor.dvector('a') a = tensor.dvector('a')
b = tensor.dvector('b') b = tensor.dvector('b')
A = stack([a, b]) A = stack(a, b)
B = A.T.dot(A) B = A.T.dot(A)
hessian(B.sum(), [a, b]) Ha, Hb = hessian(B.sum(), [a, b])
# Try some values
a_v = numpy.random.rand(4)
b_v = numpy.random.rand(4)
f = theano.function([a, b], [Ha, Hb])
Ha_v, Hb_v = f(a_v, b_v)
# The Hessian is always a matrix full of 2
assert Ha_v.shape == (4, 4)
assert Hb_v.shape == (4, 4)
assert numpy.allclose(Ha_v, 2.)
assert numpy.allclose(Hb_v, 2.)
def test_stack_hessian2(self):
# Test the hessian macro when the gradient itself does not depend
# on the input (but the cost does)
a = tensor.dvector('a')
b = tensor.dvector('b')
A = stack([a, b])
Ha, Hb = hessian(A.sum(), [a, b])
# Try some values
a_v = numpy.random.rand(4)
b_v = numpy.random.rand(4)
f = theano.function([a, b], [Ha, Hb])
Ha_v, Hb_v = f(a_v, b_v)
print Ha_v
print Hb_v
# The Hessian is always a matrix full of 0
assert Ha_v.shape == (4, 4)
assert Hb_v.shape == (4, 4)
assert numpy.allclose(Ha_v, 0.)
assert numpy.allclose(Hb_v, 0.)
def test_join_concatenate_one_element(self): def test_join_concatenate_one_element(self):
''' Fast test of concatenate as this is an alias for join. ''' Fast test of concatenate as this is an alias for join.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论