提交 924e9d03 authored 作者: Frederic's avatar Frederic

Fix grad of Alloc when we unbroadcast an input.

In some cases it was causing an error in the grad related to broadcasting. In the test it case bad shape.
上级 8bd900f8
......@@ -2547,7 +2547,28 @@ class Alloc(gof.Op):
x = inputs[0]
gz = grads[0]
n_axes_to_sum = gz.ndim - x.ndim
gx = gz.sum(axis=range(n_axes_to_sum))
#The number of dimensions added
axis = range(n_axes_to_sum)
#The broadcasted dimensions
axis_broadcasted = []
for i, (ib, gb) in enumerate(
zip(inputs[0].broadcastable,
#We need the dimensions corresponding to x
grads[0].broadcastable[-inputs[0].ndim:])):
if ib and not gb:
axis_broadcasted.append(i + n_axes_to_sum)
gx = gz.sum(axis=axis + axis_broadcasted)
if axis_broadcasted:
new_order = list(x.broadcastable)
idx = 0
for i in range(x.ndim):
if not new_order[i]:
new_order[i] = idx
idx += 1
else:
new_order[i] = 'x'
gx = gx.dimshuffle(new_order)
#Dimshuffle to add back the broadcasted dims
#The *elements* of the output are not connected to
#the inputs that specify the shape. If you grow the
#shape by epsilon, the existing elements do not
......
......@@ -1787,10 +1787,16 @@ AllocTester = makeBroadcastTester(
correct01_bcast=(rand(1), numpy.int32(7)),
correct02=(rand(), numpy.int32(4), numpy.int32(7)),
correct12=(rand(7), numpy.int32(4), numpy.int32(7)),
correct13=(rand(7), numpy.int32(2), numpy.int32(
4), numpy.int32(7)),
correct23=(rand(4, 7), numpy.int32(2), numpy.
int32(4), numpy.int32(7)),
correct13=(rand(7), numpy.int32(2), numpy.int32(4),
numpy.int32(7)),
correct23=(rand(4, 7), numpy.int32(2), numpy.int32(4),
numpy.int32(7)),
correctb1=(rand(1, 7), numpy.int32(4), numpy.int32(7)),
correctb2=(rand(1, 7), numpy.int32(2),
numpy.int32(4), numpy.int32(7)),
correctb3=(rand(7, 1), numpy.int32(7), numpy.int32(4)),
correctb4=(rand(7, 1), numpy.int32(2),
numpy.int32(7), numpy.int32(4)),
),
bad_runtime=dict(
bad_shape12=(rand(7), numpy.int32(7), numpy.int32(5)),
......@@ -1839,6 +1845,54 @@ Alloc13GradTester = makeBroadcastTester(
),
)
# unbroadcast a row to a matrix
Allocb1GradTester = makeBroadcastTester(
name='Allocb1GradTester',
op=lambda x: alloc(x, s1, s2),
expected=(lambda x: numpy.zeros((s1, s2), dtype=x.dtype) + x),
grad=dict(
x1=(rand(1, s2),),
x2=(rand(1, s2),),
x3=(rand(1, s2),),
),
)
# unbroadcast a row to a tensor3
Allocb2GradTester = makeBroadcastTester(
name='Allocb2GradTester',
op=lambda x: alloc(x, s1, s2, s3),
expected=(lambda x: numpy.zeros((s1, s2, s3), dtype=x.dtype) + x),
grad=dict(
x1=(rand(1, s3),),
x2=(rand(1, s3),),
x3=(rand(1, s3),),
),
)
# unbroadcast a col to a matrix
Allocb3GradTester = makeBroadcastTester(
name='Allocb3GradTester',
op=lambda x: alloc(x, s1, s2),
expected=(lambda x: numpy.zeros((s1, s2), dtype=x.dtype) + x),
grad=dict(
x1=(rand(s1, 1),),
x2=(rand(s1, 1),),
x3=(rand(s1, 1),),
),
)
# unbroadcast a col to a tensor3
Allocb4GradTester = makeBroadcastTester(
name='Allocb4GradTester',
op=lambda x: alloc(x, s1, s2, s3),
expected=(lambda x: numpy.zeros((s1, s2, s3), dtype=x.dtype) + x),
grad=dict(
x1=(rand(s2, 1),),
x2=(rand(s2, 1),),
x3=(rand(s2, 1),),
),
)
class TestAlloc(unittest.TestCase):
dtype = config.floatX
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论