提交 85c9686e authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #1585 from nouiz/alloc_unbroadcast_grad

Fix grad of Alloc when we unbroadcast an input.
......@@ -2,6 +2,7 @@ global-include *.txt
global-include *.cu
global-include *.cuh
global-include *.sh
global-include *.pkl
recursive-include docs
include bin/theano-cache
include bin/theano-nose
......
......@@ -190,7 +190,7 @@ def do_setup():
packages=find_packages(),
install_requires=['numpy>=1.5.0', 'scipy>=0.7.2'],
package_data={
'': ['*.txt', '*.rst', '*.cu', '*.cuh', '*.c', '*.sh',
'': ['*.txt', '*.rst', '*.cu', '*.cuh', '*.c', '*.sh', '*.pkl',
'ChangeLog'],
'theano.misc': ['*.sh']
},
......
......@@ -2547,7 +2547,28 @@ class Alloc(gof.Op):
x = inputs[0]
gz = grads[0]
n_axes_to_sum = gz.ndim - x.ndim
gx = gz.sum(axis=range(n_axes_to_sum))
#The number of dimensions added
axis = range(n_axes_to_sum)
#The broadcasted dimensions
axis_broadcasted = []
for i, (ib, gb) in enumerate(
zip(inputs[0].broadcastable,
#We need the dimensions corresponding to x
grads[0].broadcastable[-inputs[0].ndim:])):
if ib and not gb:
axis_broadcasted.append(i + n_axes_to_sum)
gx = gz.sum(axis=axis + axis_broadcasted)
if axis_broadcasted:
new_order = list(x.broadcastable)
idx = 0
for i in range(x.ndim):
if not new_order[i]:
new_order[i] = idx
idx += 1
else:
new_order[i] = 'x'
gx = gx.dimshuffle(new_order)
#Dimshuffle to add back the broadcasted dims
#The *elements* of the output are not connected to
#the inputs that specify the shape. If you grow the
#shape by epsilon, the existing elements do not
......
......@@ -1787,10 +1787,16 @@ AllocTester = makeBroadcastTester(
correct01_bcast=(rand(1), numpy.int32(7)),
correct02=(rand(), numpy.int32(4), numpy.int32(7)),
correct12=(rand(7), numpy.int32(4), numpy.int32(7)),
correct13=(rand(7), numpy.int32(2), numpy.int32(
4), numpy.int32(7)),
correct23=(rand(4, 7), numpy.int32(2), numpy.
int32(4), numpy.int32(7)),
correct13=(rand(7), numpy.int32(2), numpy.int32(4),
numpy.int32(7)),
correct23=(rand(4, 7), numpy.int32(2), numpy.int32(4),
numpy.int32(7)),
correctb1=(rand(1, 7), numpy.int32(4), numpy.int32(7)),
correctb2=(rand(1, 7), numpy.int32(2),
numpy.int32(4), numpy.int32(7)),
correctb3=(rand(7, 1), numpy.int32(7), numpy.int32(4)),
correctb4=(rand(7, 1), numpy.int32(2),
numpy.int32(7), numpy.int32(4)),
),
bad_runtime=dict(
bad_shape12=(rand(7), numpy.int32(7), numpy.int32(5)),
......@@ -1839,6 +1845,54 @@ Alloc13GradTester = makeBroadcastTester(
),
)
# unbroadcast a row to a matrix
Allocb1GradTester = makeBroadcastTester(
name='Allocb1GradTester',
op=lambda x: alloc(x, s1, s2),
expected=(lambda x: numpy.zeros((s1, s2), dtype=x.dtype) + x),
grad=dict(
x1=(rand(1, s2),),
x2=(rand(1, s2),),
x3=(rand(1, s2),),
),
)
# unbroadcast a row to a tensor3
Allocb2GradTester = makeBroadcastTester(
name='Allocb2GradTester',
op=lambda x: alloc(x, s1, s2, s3),
expected=(lambda x: numpy.zeros((s1, s2, s3), dtype=x.dtype) + x),
grad=dict(
x1=(rand(1, s3),),
x2=(rand(1, s3),),
x3=(rand(1, s3),),
),
)
# unbroadcast a col to a matrix
Allocb3GradTester = makeBroadcastTester(
name='Allocb3GradTester',
op=lambda x: alloc(x, s1, s2),
expected=(lambda x: numpy.zeros((s1, s2), dtype=x.dtype) + x),
grad=dict(
x1=(rand(s1, 1),),
x2=(rand(s1, 1),),
x3=(rand(s1, 1),),
),
)
# unbroadcast a col to a tensor3
Allocb4GradTester = makeBroadcastTester(
name='Allocb4GradTester',
op=lambda x: alloc(x, s1, s2, s3),
expected=(lambda x: numpy.zeros((s1, s2, s3), dtype=x.dtype) + x),
grad=dict(
x1=(rand(s2, 1),),
x2=(rand(s2, 1),),
x3=(rand(s2, 1),),
),
)
class TestAlloc(unittest.TestCase):
dtype = config.floatX
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论