work around numpy.fill_diagonal bug with tall matrices.

3c5d0e22 · Frederic · d268d4b9 · 3c5d0e22 · 3c5d0e22
--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -425,6 +425,10 @@ class FillDiagonal(gof.Op):
    An array identical to 'a' except that its main diagonal is filled with
    scalar 'val'. (For an array 'a' with a.ndim >= 2, the main diagonal is the
    list of locations a[i, i, ..., i] (i.e. with indices all identical).)
+    Support rectangular matrix and tensor with more then 2 dimensions
+    if the later have all dimensions are equals.
    """
    def __eq__(self, other):
@@ -457,7 +461,17 @@ class FillDiagonal(gof.Op):
    def perform(self, node, inputs, output_storage):
        a = inputs[0].copy()
        val = inputs[1]
-        numpy.fill_diagonal(a, val)
+        if a.ndim == 2:
+            # numpy.fill_diagonal up to date(including 1.6.2) have a
+            # bug for tall matrix.
+            # For 2-d arrays, we accept rectangular ones.
+            step = a.shape[1] + 1
+            end = a.shape[1] * a.shape[1]
+            # Write the value out into the diagonal.
+            a.flat[:end:step] = val
+        else:
+            numpy.fill_diagonal(a, val)
        output_storage[0][0] = a
    def grad(self, inp, cost_grad):

--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -235,28 +235,46 @@ class TestFillDiagonal(utt.InferShapeTester):
        x = tensor.dmatrix()
        y = tensor.dscalar()
        f = function([x, y], fill_diagonal(x, y))
-        a = numpy.random.rand(8, 5)
+        for shp in [(8, 8), (5, 8), (8, 5)]:
-        val = numpy.random.rand()
+            a = numpy.random.rand(*shp)
+            val = numpy.random.rand()
+            out = f(a, val)
+            # We can't use numpy.fill_diagonal as it is bugged.
+            assert numpy.allclose(numpy.diag(out), val)
+            assert (out == val).sum() == min(a.shape)
+        # test for 3d tensor
+        a = numpy.random.rand(3, 3, 3)
+        x = tensor.dtensor3()
+        y = tensor.dscalar()
+        f = function([x, y], fill_diagonal(x, y))
+        val = numpy.random.rand() + 10
        out = f(a, val)
-        numpy.fill_diagonal(a, val)
+        # We can't use numpy.fill_diagonal as it is bugged.
-        # remember that numpy.fill_diagonal works in place
+        assert out[0, 0, 0] == val
-        assert numpy.allclose(out, a)
+        assert out[1, 1, 1] == val
+        assert out[2, 2, 2] == val
+        assert (out == val).sum() == min(a.shape)
    def test_gradient(self):
        utt.verify_grad(fill_diagonal, [numpy.random.rand(5, 8),
                                        numpy.random.rand()],
                        n_tests=1, rng=TestFillDiagonal.rng)
-        #  TODO: check why gradient wrto val does not match when a has
-        # more rows than cols: might be problem with testing procedure
        utt.verify_grad(fill_diagonal, [numpy.random.rand(8, 5),
                                        numpy.random.rand()],
                        n_tests=1, rng=TestFillDiagonal.rng)
    def test_infer_shape(self):
+        z = tensor.dtensor3()
        x = tensor.dmatrix()
        y = tensor.dscalar()
        self._compile_and_check([x, y], [self.op(x, y)],
-                                [numpy.random.rand(8, 5), numpy.random.rand()],
+                                [numpy.random.rand(8, 5),
+                                 numpy.random.rand()],
+                                self.op_class)
+        self._compile_and_check([z, y], [self.op(z, y)],
+                                [numpy.random.rand(8, 8, 8),
+                                 numpy.random.rand()],
                                self.op_class)
 if __name__ == "__main__":
@@ -265,4 +283,3 @@ if __name__ == "__main__":
    t.test_perform()
    t.test_gradient()
    t.test_infer_shape()