Merge pull request #1585 from nouiz/alloc_unbroadcast_grad

Fix grad of Alloc when we unbroadcast an input.

Merge pull request #1585 from nouiz/alloc_unbroadcast_grad
85c9686e · Pascal Lamblin · 2d492fd2 · f95390d0 · 85c9686e · 85c9686e
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,6 +2,7 @@ global-include *.txt
 global-include *.cu
 global-include *.cuh
 global-include *.sh
+global-include *.pkl
 recursive-include docs
 include bin/theano-cache
 include bin/theano-nose

--- a/setup.py
+++ b/setup.py
@@ -190,7 +190,7 @@ def do_setup():
          packages=find_packages(),
          install_requires=['numpy>=1.5.0', 'scipy>=0.7.2'],
          package_data={
-              '': ['*.txt', '*.rst', '*.cu', '*.cuh', '*.c', '*.sh',
+              '': ['*.txt', '*.rst', '*.cu', '*.cuh', '*.c', '*.sh', '*.pkl',
                   'ChangeLog'],
              'theano.misc': ['*.sh']
          },

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2547,7 +2547,28 @@ class Alloc(gof.Op):
        x = inputs[0]
        gz = grads[0]
        n_axes_to_sum = gz.ndim - x.ndim
-        gx = gz.sum(axis=range(n_axes_to_sum))
+        #The number of dimensions added
+        axis = range(n_axes_to_sum)
+        #The broadcasted dimensions
+        axis_broadcasted = []
+        for i, (ib, gb) in enumerate(
+            zip(inputs[0].broadcastable,
+                #We need the dimensions corresponding to x
+                grads[0].broadcastable[-inputs[0].ndim:])):
+            if ib and not gb:
+                axis_broadcasted.append(i + n_axes_to_sum)
+        gx = gz.sum(axis=axis + axis_broadcasted)
+        if axis_broadcasted:
+            new_order = list(x.broadcastable)
+            idx = 0
+            for i in range(x.ndim):
+                if not new_order[i]:
+                    new_order[i] = idx
+                    idx += 1
+                else:
+                    new_order[i] = 'x'
+            gx = gx.dimshuffle(new_order)
+            #Dimshuffle to add back the broadcasted dims
        #The *elements* of the output are not connected to
        #the inputs that specify the shape. If you grow the
        #shape by epsilon, the existing elements do not

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1787,10 +1787,16 @@ AllocTester = makeBroadcastTester(
            correct01_bcast=(rand(1), numpy.int32(7)),
            correct02=(rand(), numpy.int32(4), numpy.int32(7)),
            correct12=(rand(7), numpy.int32(4), numpy.int32(7)),
-            correct13=(rand(7), numpy.int32(2), numpy.int32(
-                4), numpy.int32(7)),
-            correct23=(rand(4, 7), numpy.int32(2), numpy.
-                int32(4), numpy.int32(7)),
+            correct13=(rand(7), numpy.int32(2), numpy.int32(4),
+                       numpy.int32(7)),
+            correct23=(rand(4, 7), numpy.int32(2), numpy.int32(4),
+                       numpy.int32(7)),
+            correctb1=(rand(1, 7), numpy.int32(4), numpy.int32(7)),
+            correctb2=(rand(1, 7), numpy.int32(2),
+                       numpy.int32(4), numpy.int32(7)),
+            correctb3=(rand(7, 1), numpy.int32(7), numpy.int32(4)),
+            correctb4=(rand(7, 1), numpy.int32(2),
+                       numpy.int32(7), numpy.int32(4)),
            ),
        bad_runtime=dict(
                    bad_shape12=(rand(7), numpy.int32(7), numpy.int32(5)),
@@ -1839,6 +1845,54 @@ Alloc13GradTester = makeBroadcastTester(
            ),
        )

+# unbroadcast a row to a matrix
+Allocb1GradTester = makeBroadcastTester(
+    name='Allocb1GradTester',
+    op=lambda x: alloc(x, s1, s2),
+    expected=(lambda x: numpy.zeros((s1, s2), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(1, s2),),
+        x2=(rand(1, s2),),
+        x3=(rand(1, s2),),
+    ),
+)
+
+# unbroadcast a row to a tensor3
+Allocb2GradTester = makeBroadcastTester(
+    name='Allocb2GradTester',
+    op=lambda x: alloc(x, s1, s2, s3),
+    expected=(lambda x: numpy.zeros((s1, s2, s3), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(1, s3),),
+        x2=(rand(1, s3),),
+        x3=(rand(1, s3),),
+    ),
+)
+
+# unbroadcast a col to a matrix
+Allocb3GradTester = makeBroadcastTester(
+    name='Allocb3GradTester',
+    op=lambda x: alloc(x, s1, s2),
+    expected=(lambda x: numpy.zeros((s1, s2), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(s1, 1),),
+        x2=(rand(s1, 1),),
+        x3=(rand(s1, 1),),
+    ),
+)
+
+# unbroadcast a col to a tensor3
+Allocb4GradTester = makeBroadcastTester(
+    name='Allocb4GradTester',
+    op=lambda x: alloc(x, s1, s2, s3),
+    expected=(lambda x: numpy.zeros((s1, s2, s3), dtype=x.dtype) + x),
+    grad=dict(
+        x1=(rand(s2, 1),),
+        x2=(rand(s2, 1),),
+        x3=(rand(s2, 1),),
+    ),
+)
+

 class TestAlloc(unittest.TestCase):
    dtype = config.floatX