提交 24492f37 authored 作者: Reyhane Askari's avatar Reyhane Askari

Useless sum in grad removed and test added

上级 ecfc65ec
......@@ -700,7 +700,7 @@ second dimension
# we can sum over them
# todo: only count dimensions that were effectively broadcasted
to_sum = [j for j, bcast in enumerate(ipt.type.broadcastable)
if bcast]
if bcast and not outs[0].broadcastable[j]]
if to_sum:
shuffle = []
......
......@@ -4600,7 +4600,6 @@ class Canonizer(gof.LocalOptimizer):
| x * y * z -> ([x, y, z], [])
"""
# This function is recursive. The idea is that there is a
# get_num_denum recursion in which the internal ops are all
# one of (main, inverse, reciprocal, DimShuffle) and the
......@@ -6332,8 +6331,9 @@ def local_greedy_distributor(node):
if candidate not in num:
continue
num.remove(candidate)
_change, candidate, num, denum = attempt_distribution(candidate,
num, denum, out_type)
_change, candidate, num, denum = attempt_distribution(
candidate, num, denum, out_type,)
change |= _change
new_num.append(candidate)
......@@ -6341,11 +6341,10 @@ def local_greedy_distributor(node):
if candidate not in denum:
continue
denum.remove(candidate)
_change, candidate, denum, num = attempt_distribution(candidate,
denum, num, out_type)
_change, candidate, denum, num = attempt_distribution(
candidate, denum, num, out_type)
change |= _change
new_denum.append(candidate)
if not change:
return False
......
......@@ -1242,6 +1242,36 @@ def test_clip_grad():
[numpy.asarray([-1., 0.5, 2.]), 0., 1.])
def test_grad_useless_sum():
"""Test absence of useless sum.
When an operation (such as T.mul) is done on a broadcastable vector and
a matrix, the gradient in backward path is computed for the broadcasted
vector. So a sum reverts the broadcasted vector to a vector. In the case
of operations on two vectors, the sum should not be generated.
This test checks whether there is a useless sum in the gradient
computations.
"""
x = tensor.TensorType(theano.config.floatX, (True,))('x')
l = tensor.log(1.0 - tensor.nnet.sigmoid(x))[0]
g = tensor.grad(l, x)
nodes = theano.gof.graph.ops([x], [g])
f = theano.function([x], g)
test_values = [-100, -1, 0, 1, 100]
outputs = []
for test_value in test_values:
outputs.append(f(numpy.array([test_value]).astype('float32')))
assert not any([isinstance(node.op, theano.tensor.elemwise.Sum) for node in nodes])
numpy.allclose(outputs, [[-3.72007598e-44],
[-0.26894142],
[-0.5],
[-0.73105858],
[-1.]])
def test_clip_grad_int():
# test that integers don't crash clip gradient
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论