Fix grad of Alloc when we unbroadcast an input.

In some cases it was causing an error in the grad related to broadcasting. In the test it case bad shape.

Fix grad of Alloc when we unbroadcast an input.
924e9d03 · Frederic · 8bd900f8 · 924e9d03 · 924e9d03
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2547,7 +2547,28 @@ class Alloc(gof.Op):
        x = inputs[0]
        gz = grads[0]
        n_axes_to_sum = gz.ndim - x.ndim
-        gx = gz.sum(axis=range(n_axes_to_sum))
+        #The number of dimensions added
+        axis = range(n_axes_to_sum)
+        #The broadcasted dimensions
+        axis_broadcasted = []
+        for i, (ib, gb) in enumerate(
+            zip(inputs[0].broadcastable,
+                #We need the dimensions corresponding to x
+                grads[0].broadcastable[-inputs[0].ndim:])):
+            if ib and not gb:
+                axis_broadcasted.append(i + n_axes_to_sum)
+        gx = gz.sum(axis=axis + axis_broadcasted)
+        if axis_broadcasted:
+            new_order = list(x.broadcastable)
+            idx = 0
+            for i in range(x.ndim):
+                if not new_order[i]:
+                    new_order[i] = idx
+                    idx += 1
+                else:
+                    new_order[i] = 'x'
+            gx = gx.dimshuffle(new_order)
+            #Dimshuffle to add back the broadcasted dims
        #The *elements* of the output are not connected to
        #the inputs that specify the shape. If you grow the
        #shape by epsilon, the existing elements do not

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1787,10 +1787,16 @@ AllocTester = makeBroadcastTester(
            correct01_bcast=(rand(1), numpy.int32(7)),
            correct02=(rand(), numpy.int32(4), numpy.int32(7)),
            correct12=(rand(7), numpy.int32(4), numpy.int32(7)),
-            correct13=(rand(7), numpy.int32(2), numpy.int32(
-                4), numpy.int32(7)),
-            correct23=(rand(4, 7), numpy.int32(2), numpy.
-                int32(4), numpy.int32(7)),
+            correct13=(rand(7), numpy.int32(2), numpy.int32(4),
+                       numpy.int32(7)),
+            correct23=(rand(4, 7), numpy.int32(2), numpy.int32(4),
+                       numpy.int32(7)),
+            correctb1=(rand(1, 7), numpy.int32(4), numpy.int32(7)),
+            correctb2=(rand(1, 7), numpy.int32(2),
+                       numpy.int32(4), numpy.int32(7)),
+            correctb3=(rand(7, 1), numpy.int32(7), numpy.int32(4)),
+            correctb4=(rand(7, 1), numpy.int32(2),
+                       numpy.int32(7), numpy.int32(4)),
            ),
        bad_runtime=dict(
                    bad_shape12=(rand(7), numpy.int32(7), numpy.int32(5)),
@@ -1839,6 +1845,54 @@ Alloc13GradTester = makeBroadcastTester(
            ),
        )

+# unbroadcast a row to a matrix
+Allocb1GradTester = makeBroadcastTester(
+    name='Allocb1GradTester',
+    op=lambda x: alloc(x, s1, s2),
+    expected=(lambda x: numpy.zeros((s1, s2), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(1, s2),),
+        x2=(rand(1, s2),),
+        x3=(rand(1, s2),),
+    ),
+)
+
+# unbroadcast a row to a tensor3
+Allocb2GradTester = makeBroadcastTester(
+    name='Allocb2GradTester',
+    op=lambda x: alloc(x, s1, s2, s3),
+    expected=(lambda x: numpy.zeros((s1, s2, s3), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(1, s3),),
+        x2=(rand(1, s3),),
+        x3=(rand(1, s3),),
+    ),
+)
+
+# unbroadcast a col to a matrix
+Allocb3GradTester = makeBroadcastTester(
+    name='Allocb3GradTester',
+    op=lambda x: alloc(x, s1, s2),
+    expected=(lambda x: numpy.zeros((s1, s2), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(s1, 1),),
+        x2=(rand(s1, 1),),
+        x3=(rand(s1, 1),),
+    ),
+)
+
+# unbroadcast a col to a tensor3
+Allocb4GradTester = makeBroadcastTester(
+    name='Allocb4GradTester',
+    op=lambda x: alloc(x, s1, s2, s3),
+    expected=(lambda x: numpy.zeros((s1, s2, s3), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(s2, 1),),
+        x2=(rand(s2, 1),),
+        x3=(rand(s2, 1),),
+    ),
+)
+

 class TestAlloc(unittest.TestCase):
    dtype = config.floatX