提交 24492f37 authored 作者: Reyhane Askari's avatar Reyhane Askari

Useless sum in grad removed and test added

上级 ecfc65ec
...@@ -700,7 +700,7 @@ second dimension ...@@ -700,7 +700,7 @@ second dimension
# we can sum over them # we can sum over them
# todo: only count dimensions that were effectively broadcasted # todo: only count dimensions that were effectively broadcasted
to_sum = [j for j, bcast in enumerate(ipt.type.broadcastable) to_sum = [j for j, bcast in enumerate(ipt.type.broadcastable)
if bcast] if bcast and not outs[0].broadcastable[j]]
if to_sum: if to_sum:
shuffle = [] shuffle = []
......
...@@ -4600,7 +4600,6 @@ class Canonizer(gof.LocalOptimizer): ...@@ -4600,7 +4600,6 @@ class Canonizer(gof.LocalOptimizer):
| x * y * z -> ([x, y, z], []) | x * y * z -> ([x, y, z], [])
""" """
# This function is recursive. The idea is that there is a # This function is recursive. The idea is that there is a
# get_num_denum recursion in which the internal ops are all # get_num_denum recursion in which the internal ops are all
# one of (main, inverse, reciprocal, DimShuffle) and the # one of (main, inverse, reciprocal, DimShuffle) and the
...@@ -6332,8 +6331,9 @@ def local_greedy_distributor(node): ...@@ -6332,8 +6331,9 @@ def local_greedy_distributor(node):
if candidate not in num: if candidate not in num:
continue continue
num.remove(candidate) num.remove(candidate)
_change, candidate, num, denum = attempt_distribution(candidate, _change, candidate, num, denum = attempt_distribution(
num, denum, out_type) candidate, num, denum, out_type,)
change |= _change change |= _change
new_num.append(candidate) new_num.append(candidate)
...@@ -6341,11 +6341,10 @@ def local_greedy_distributor(node): ...@@ -6341,11 +6341,10 @@ def local_greedy_distributor(node):
if candidate not in denum: if candidate not in denum:
continue continue
denum.remove(candidate) denum.remove(candidate)
_change, candidate, denum, num = attempt_distribution(candidate, _change, candidate, denum, num = attempt_distribution(
denum, num, out_type) candidate, denum, num, out_type)
change |= _change change |= _change
new_denum.append(candidate) new_denum.append(candidate)
if not change: if not change:
return False return False
......
...@@ -1242,6 +1242,36 @@ def test_clip_grad(): ...@@ -1242,6 +1242,36 @@ def test_clip_grad():
[numpy.asarray([-1., 0.5, 2.]), 0., 1.]) [numpy.asarray([-1., 0.5, 2.]), 0., 1.])
def test_grad_useless_sum():
"""Test absence of useless sum.
When an operation (such as T.mul) is done on a broadcastable vector and
a matrix, the gradient in backward path is computed for the broadcasted
vector. So a sum reverts the broadcasted vector to a vector. In the case
of operations on two vectors, the sum should not be generated.
This test checks whether there is a useless sum in the gradient
computations.
"""
x = tensor.TensorType(theano.config.floatX, (True,))('x')
l = tensor.log(1.0 - tensor.nnet.sigmoid(x))[0]
g = tensor.grad(l, x)
nodes = theano.gof.graph.ops([x], [g])
f = theano.function([x], g)
test_values = [-100, -1, 0, 1, 100]
outputs = []
for test_value in test_values:
outputs.append(f(numpy.array([test_value]).astype('float32')))
assert not any([isinstance(node.op, theano.tensor.elemwise.Sum) for node in nodes])
numpy.allclose(outputs, [[-3.72007598e-44],
[-0.26894142],
[-0.5],
[-0.73105858],
[-1.]])
def test_clip_grad_int(): def test_clip_grad_int():
# test that integers don't crash clip gradient # test that integers don't crash clip gradient
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论