Merge pull request #1909 from ChienliMa/master

Add fill_diagonal_offset()

Merge pull request #1909 from ChienliMa/master
de8c4a4c · Frédéric Bastien · 8e7e03d1 · 9f72dfaa · de8c4a4c · de8c4a4c
--- a/theano/tensor/__init__.py
+++ b/theano/tensor/__init__.py
@@ -62,4 +62,5 @@ from theano.gradient import Rop, Lop, grad, numeric_grad, verify_grad, \
 from theano.tensor.sort import sort, argsort
 from theano.tensor.extra_ops import (DiffOp, bincount, squeeze,
-                       repeat, bartlett, fill_diagonal, cumsum, cumprod)
+                       repeat, bartlett, fill_diagonal, fill_diagonal_offset,
+                       cumsum, cumprod)
--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -2,12 +2,15 @@ import numpy as np
 import numpy
 import theano
 from theano.tensor import basic
 from theano import gof, scalar
 tensor = basic
 from theano.gradient import DisconnectedType
 class CumsumOp(theano.Op):
    # See function cumsum for docstring
    def __init__(self, axis=None):
@@ -664,8 +667,8 @@ class FillDiagonal(gof.Op):
                            % self.__class__.__name__)
        val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype))
        if val.dtype != a.dtype:
-            raise TypeError('%s: type of second parameter must be compatible'
+            raise TypeError('%s: type of second parameter must be the same as'
-                          ' with first\'s' % self.__class__.__name__)
+                          ' the first\'s' % self.__class__.__name__)
        return gof.Apply(self, [a, val], [a.type()])
    def perform(self, node, inputs, output_storage):
@@ -725,3 +728,132 @@ def fill_diagonal(a, val):
    .. versionadded:: 0.6
    """
    return fill_diagonal_(a, val)
+class FillDiagonalOffset(gof.Op):
+    # See function fill_diagonal_offset for docstring
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(type(self))
+    def __str__(self):
+        return self.__class__.__name__
+    def infer_shape(self, node, in_shapes):
+        return [in_shapes[0]]
+    def make_node(self, a, val, offset):
+        a = tensor.as_tensor_variable(a)
+        val = tensor.as_tensor_variable(val)
+        offset = tensor.as_tensor_variable(offset)
+        if a.ndim != 2:
+            raise TypeError('%s: first parameter must have exactly'
+                            ' two dimensions' % self.__class__.__name__)
+        elif val.ndim != 0:
+            raise TypeError('%s: second parameter must be a scalar'\
+                            % self.__class__.__name__)
+        elif offset.ndim != 0:
+            raise TypeError('%s: third parameter must be a scalar'\
+                            % self.__class__.__name__)
+        val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype))
+        if val.dtype != a.dtype:
+            raise TypeError('%s: type of second parameter must be the same'
+                            ' as the first\'s' % self.__class__.__name__)
+        elif offset.dtype[:3] != 'int':
+            raise TypeError('%s: type of third parameter must be as integer'
+                            ' use theano.tensor.cast( input, \'int32/int64\')' \
+                            % self.__class__.__name__)
+        return gof.Apply(self, [a, val, offset], [a.type()])
+    def perform(self, node, inputs, output_storage):
+        a = inputs[0].copy()
+        val = inputs[1]
+        offset = inputs[2]
+        height, width = a.shape
+        """
+        Note: The fill_diagonal only support rectangular matrix. The output
+        of tall matrix is "wrapped", which is an option in numpy 1.9.0
+        but was regarded as a bug in numpy 1.6.2. Here I implement the 
+        fill_diagonal_offset with unwrapped output, so fill_diagonal_offset
+        supports tall matrix.(This make a little difference between the output
+        of fill_diagonal and fill_diagonal_offset only in the case of tall 
+        matrix)
+        """
+        if offset >= 0:
+            start = offset
+            num_of_step = min( min(width,height), width - offset) 
+        else:
+            start = - offset * a.shape[1]
+            num_of_step = min( min(width,height), height + offset)
+        step = a.shape[1] + 1
+        end = start + step * num_of_step
+        # Write the value out into the diagonal.
+        a.flat[start:end:step] = val
+        output_storage[0][0] = a
+    def grad(self, inp, cost_grad):
+        """
+        Note: The gradient is currently implemented for matrices
+        only.
+        """
+        a, val, offset = inp
+        grad = cost_grad[0]
+        height, width = grad.shape
+        if (a.dtype.startswith('complex')):
+            return [None, None]
+        # only valid for matrices        
+        wr_a = fill_diagonal_offset(grad, 0, offset)  
+        offset_abs = basic.abs_( offset ) 
+        pos_offset_flag = basic.ge( offset, 0 )
+        neg_offset_flag = basic.lt( offset, 0 )
+        min_wh = basic.minimum(width,height)
+        start = offset * pos_offset_flag + offset_abs * width \
+                 * neg_offset_flag
+        num_of_step = basic.minimum( min_wh, width * pos_offset_flag
+                    + height * neg_offset_flag - offset_abs )   
+        step = a.shape[1] + 1
+        end = start + step * num_of_step
+        # input of slice should be integer
+        start = basic.cast(start,'int32')
+        step = basic.cast(step,'int32')
+        end = basic.cast(end,'int32')
+        wr_val = grad.flatten()[start:end:step].sum()
+        wr_offset = theano.gradient.grad_undefined(
+            self, 2, offset,
+            "offset is not defined for non-integer offset so"
+            " fill_diagonal_offset(a,val,offset+eps) is undefined")
+        return [wr_a, wr_val,wr_offset]
+fill_diagonal_offset = FillDiagonalOffset()
+""" Returns a copy of an array with all
+    elements of the main diagonal set to a specified scalar value.
+    :param a: Rectangular array of two dimensions.
+    :param val: Scalar value to fill the diagonal whose type must be
+        compatible with that of array 'a' (i.e. 'val' cannot be viewed
+        as an upcast of 'a').
+    :params offset : Scalar value Offset of the diagonal from the main 
+        diagonal. Can be positive or negative integer.
+    :return: An array identical to 'a' except that its offset diagonal
+        is filled with scalar 'val'. The output is unwrapped.
+"""
--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -8,7 +8,8 @@ from theano.tests import unittest_tools as utt
 from theano.tensor.extra_ops import (CumsumOp, cumsum, CumprodOp, cumprod,
                                     BinCountOp, bincount, DiffOp, diff,
                                     squeeze, RepeatOp, repeat, Bartlett, bartlett,
-                                     FillDiagonal, fill_diagonal)
+                                     FillDiagonal, fill_diagonal, FillDiagonalOffset,
+                                     fill_diagonal_offset)
 from theano import tensor as T
 from theano import config, tensor, function
@@ -464,3 +465,65 @@ class TestFillDiagonal(utt.InferShapeTester):
                                 numpy.random.rand()],
                                self.op_class,
                                warn=False)
+class TestFillDiagonalOffset(utt.InferShapeTester):
+    rng = numpy.random.RandomState(43)
+    def setUp(self):
+        super(TestFillDiagonalOffset, self).setUp()
+        self.op_class = FillDiagonalOffset
+        self.op = fill_diagonal_offset
+    def test_perform(self):
+        x = tensor.matrix()
+        y = tensor.scalar()
+        z = tensor.iscalar()
+        f = function([x, y, z], fill_diagonal_offset(x, y, z))
+        for test_offset in (-5, -4, -1, 0, 1, 4, 5):
+            for shp in [(8, 8), (5, 8), (8, 5), (5, 5)]:
+                a = numpy.random.rand(*shp).astype(config.floatX)
+                val = numpy.cast[config.floatX](numpy.random.rand())
+                out = f(a, val, test_offset)
+                # We can't use numpy.fill_diagonal as it is bugged.
+                assert numpy.allclose(numpy.diag(out, test_offset), val)
+                if test_offset >= 0:
+                   assert (out == val).sum() == min( min(a.shape), 
+                                            a.shape[1]-test_offset )
+                else:
+                    assert (out == val).sum() == min( min(a.shape), 
+                                            a.shape[0]+test_offset )
+    def test_gradient(self):
+        for test_offset in (-5, -4, -1, 0, 1, 4, 5):
+            # input 'offset' will not be tested
+            def fill_diagonal_with_fix_offset( a, val):
+                return fill_diagonal_offset( a, val, test_offset)
+            utt.verify_grad(fill_diagonal_with_fix_offset, 
+                        [numpy.random.rand(5, 8), numpy.random.rand()],
+                            n_tests=1, rng=TestFillDiagonalOffset.rng)
+            utt.verify_grad(fill_diagonal_with_fix_offset, 
+                        [numpy.random.rand(8, 5), numpy.random.rand()],
+                            n_tests=1, rng=TestFillDiagonalOffset.rng)
+            utt.verify_grad(fill_diagonal_with_fix_offset, 
+                        [numpy.random.rand(5, 5), numpy.random.rand()],
+                            n_tests=1, rng=TestFillDiagonalOffset.rng)
+    def test_infer_shape(self):
+        x = tensor.dmatrix()
+        y = tensor.dscalar()
+        z = tensor.iscalar()
+        for test_offset in (-5, -4, -1, 0, 1, 4, 5):
+            self._compile_and_check([x, y, z], [self.op(x, y, z)],
+                                    [numpy.random.rand(8, 5),
+                                     numpy.random.rand(),
+                                     test_offset],
+                                     self.op_class )
+            self._compile_and_check([x, y, z], [self.op(x, y, z)],
+                                    [numpy.random.rand(5, 8),
+                                     numpy.random.rand(),
+                                     test_offset],
+                                     self.op_class )
--- a/theano/tensor/tests/test_sort.py
+++ b/theano/tensor/tests/test_sort.py
@@ -143,3 +143,5 @@ def test_argsort():
    gv = f(m_val)
    gt = np.argsort(m_val, None)
    assert np.allclose(gv, gt)