Merge pull request #2844 from nouiz/alloc_crash

Fix crash in Alloc grad

Merge pull request #2844 from nouiz/alloc_crash
92664b48 · abergeron · 1547ecc6 · 879faed6 · 92664b48 · 92664b48
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2520,22 +2520,20 @@ class Alloc(gof.Op):
        axis = range(n_axes_to_sum)
        # The broadcasted dimensions
        axis_broadcasted = []
+        axis_kept = []
        for i, (ib, gb) in enumerate(
            zip(inputs[0].broadcastable,
                # We need the dimensions corresponding to x
                grads[0].broadcastable[-inputs[0].ndim:])):
            if ib and not gb:
                axis_broadcasted.append(i + n_axes_to_sum)
+            else:
+                axis_kept.append(i)
        gx = gz.sum(axis=axis + axis_broadcasted)
        if axis_broadcasted:
-            new_order = list(x.broadcastable)
-            idx = 0
-            for i in range(x.ndim):
-                if not new_order[i]:
-                    new_order[i] = idx
-                    idx += 1
-                else:
-                    new_order[i] = 'x'
+            new_order = ['x'] * x.ndim
+            for idx, axis in enumerate(axis_kept):
+                new_order[axis] = idx
            gx = gx.dimshuffle(new_order)
            # Dimshuffle to add back the broadcasted dims
        # The *elements* of the output are not connected to

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -2065,6 +2065,29 @@ Allocb4GradTester = makeBroadcastTester(
 )


+# Partial un broadcast of a dimshuffled input
+AllocDimshuffleGradTester = makeBroadcastTester(
+    name='Allocb4GradTester',
+    op=lambda x: alloc(x.dimshuffle('x', 'x', 0), 1, s2, s3),
+    expected=(lambda x: numpy.zeros((1, s2, s3), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(s3),),
+        x2=(rand(s3),),
+        x3=(rand(s3),),
+    ),
+)
+AllocDimshuffleGradTester2 = makeBroadcastTester(
+    name='Allocb4GradTester',
+    op=lambda x: alloc(x.dimshuffle('x', 0), 1, s2, s3),
+    expected=(lambda x: numpy.zeros((1, s2, s3), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(s3),),
+        x2=(rand(s3),),
+        x3=(rand(s3),),
+    ),
+)
+
+
 class ApplyDefaultTestOp(theano.Op):
    def __init__(self, id):
        self.default_output = id