Merge pull request #6431 from shawntan/issue-5633

Generalised AllocDiag for more than 2D input

Merge pull request #6431 from shawntan/issue-5633
42f0bbf3 · Frédéric Bastien · GitHub · 5a03ac02 · 1105c2bd · 42f0bbf3
--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -9,6 +9,7 @@ from theano.gof import ParamsType
 from theano.gradient import grad_not_implemented
 import theano.tensor as T
 from theano.tensor.subtensor import IncSubtensor, Subtensor, get_idx_list
+from theano.tensor import AllocDiag
 from theano.scalar import bool as bool_t, int32 as int_t, uint32 as size_t
 try:
@@ -1356,40 +1357,61 @@ class GpuExtractDiag(Op):
        return [tuple(out_shape)]
-class GpuAllocDiag(Op):
+class GpuAllocDiag(AllocDiag):
-    __props__ = ("offset",)
+    __props__ = ("offset", "axis1", "axis2")
-    def __init__(self, offset=0):
+    def make_node(self, diag):
-        self.offset = offset
+        ctx_name = infer_context_name(diag)
+        diag = as_gpuarray_variable(diag, ctx_name)
-    def make_node(self, _x):
+        if diag.type.ndim < 1:
-        ctx_name = infer_context_name(_x)
+            raise ValueError('AllocDiag needs an input with 1 or more '
-        x = as_gpuarray_variable(_x, ctx_name)
+                             'dimensions', diag.type)
+        return gof.Apply(
-        if x.ndim != 1:
+            self, [diag],
-            raise ValueError('AllocDiag argument must be a vector!', x)
+            [diag.type.__class__(
+                dtype=diag.dtype,
-        return gof.Apply(self, [x], [x.type.clone(broadcastable=(False, False))()])
+                broadcastable=[False] * (diag.ndim + 1))()]
+        )
    def perform(self, node, inputs, outputs):
        (x,) = inputs
        (z,) = outputs
+        axis1 = np.minimum(self.axis1, self.axis2)
+        axis2 = np.maximum(self.axis1, self.axis2)
+        offset = self.offset
-        dim = x.shape[0] + abs(self.offset)
+        # Initialise a buffer the same size as the output
-        z[0] = gpuarray.zeros((dim, dim), dtype=x.dtype, context=x.context)
+        result_shape = x.shape[:-1] + (x.shape[-1] + abs(offset),) * 2
+        result_buffer_shape = ((np.prod(x.shape[:-1]).astype(np.int64),) +
-        if self.offset <= 0:  # diag in the lower triangle
+                               (x.shape[-1] + abs(offset),) * 2)
-            diag_z = z[0][-self.offset, :(dim + self.offset)]
+        result_buffer = gpuarray.zeros(result_buffer_shape,
+                                       dtype=x.dtype,
+                                       context=x.context)
+        # Slice out a view of the diagonals
+        if offset < 0:  # diag in the lower triangle
+            diag_view = result_buffer[:, abs(offset):, 0]
        else:  # diag in the upper triangle
-            diag_z = z[0][:(dim - self.offset), self.offset]
+            diag_view = result_buffer[:, :x.shape[-1], abs(offset)]
-        diag_z.strides = (sum(z[0].strides),)
+        diag_view.strides = (diag_view.strides[0],
+                             diag_view.strides[1] + x.dtype.itemsize)
+        # Fill view with flattened array of diagonals
+        diag_view[:] = x.reshape(diag_view.shape)[:]
+        # Unflatten buffer into output size
+        result = result_buffer.reshape(result_shape)
-        diag_z[:] = x[:]
+        if len(x.shape) > 1:
+            # Re-order axes so they correspond to diagonals at axis1, axis2
+            axes = list(range(len(x.shape[:-1])))
+            last_idx = axes[-1]
+            axes = axes[:axis1] + [last_idx + 1] + axes[axis1:]
+            axes = axes[:axis2] + [last_idx + 2] + axes[axis2:]
+            result = result.transpose(axes)
+        z[0] = result
    def grad(self, inputs, gout):
        (gz,) = gout
-        return [GpuExtractDiag(offset=self.offset, axis1=0, axis2=1)(gz)]
+        return [GpuExtractDiag(offset=self.offset, axis1=self.axis1, axis2=self.axis2)(gz)]
-    def infer_shape(self, node, shapes):
-        dim = shapes[0][0] + abs(self.offset)
-        return [[dim, dim]]
--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -5,7 +5,7 @@ import unittest
 import theano
 from theano import tensor
 from theano.compile import DeepCopyOp
-from theano.tensor.tests import test_subtensor
+from theano.tensor.tests import test_subtensor, test_basic
 from theano.tests import unittest_tools as utt
 from ..basic_ops import HostFromGpu, GpuFromHost, GpuContiguous
@@ -318,6 +318,15 @@ class test_gpuextractdiag(unittest.TestCase):
                np_x.diagonal(offset, axis1, axis2))
+class TestGpuAllocDiag(test_basic.TestAllocDiag):
+    def __init__(self, name):
+        return test_basic.TestAllocDiag.__init__(
+            self, name,
+            alloc_diag=GpuAllocDiag,
+            mode=mode_with_gpu
+        )
 class test_gpuallocdiag(unittest.TestCase):
    def test_allocdiag_opt(self):
        x = tensor.vector()

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -6518,8 +6518,18 @@ class AllocDiag(Op):
    Parameters
    ----------
-    offset : int
+    axis1: Axis to be used as the first axis of the 2-D
-        Indicates which diagonal to put `x` into. Defaults to `0`.
+        sub-arrays to which the diagonals will be allocated.
+        Defaults to first axis (0).
+    axis2: Axis to be used as the second axis of the 2-D
+        sub-arrays to which the diagonals will be allocated.
+        Defaults to second axis (1).
+    offset: Offset of the diagonal from the main diagonal defined by `axis1`
+        and `axis2`.
+        Can be positive or negative.
+        Defaults to main diagonal (0).
    x: symbolic vector
        A tensor vector consists of diagonal values.
@@ -6527,34 +6537,92 @@ class AllocDiag(Op):
    Returns
    -------
    tensor : symbolic tenstor
-        A tensor with passed vector values at its corresponding diagonal.
+        A tensor with passed tensor values at their corresponding diagonals.
    """
-    __props__ = ("offset", )
+    __props__ = ("offset", "axis1", "axis2")
-    default_offset = 0
-    def __init__(self, offset=0):
+    def __init__(self, offset=0, axis1=0, axis2=1):
-        if numpy_diagonal_return_view:
-            self.view_map = {0: [0]}
        self.offset = offset
+        self.axis1 = axis1
+        self.axis2 = axis2
    def make_node(self, diag):
        diag = as_tensor_variable(diag)
-        if diag.type.ndim != 1:
+        if diag.type.ndim < 1:
-            raise TypeError('data argument must be a vector', diag.type)
+            raise ValueError('AllocDiag needs an input with 1 or more '
-        return Apply(self, [diag], [matrix(dtype=diag.dtype)])
+                             'dimensions', diag.type)
+        return Apply(
+            self, [diag],
+            [diag.type.__class__(
+                dtype=diag.dtype,
+                broadcastable=[False] * (diag.ndim + 1))()]
+        )
    def perform(self, node, inputs, outputs):
+        (x,) = inputs
        (z,) = outputs
-        z[0] = np.diag(inputs[0], self.offset)
+        axis1 = np.minimum(self.axis1, self.axis2)
+        axis2 = np.maximum(self.axis1, self.axis2)
+        offset = self.offset
+        # Create array with one extra dimension for resulting matrix
+        result_shape = x.shape[:-1] + (x.shape[-1] + abs(offset),) * 2
+        result = np.zeros(result_shape, dtype=x.dtype)
+        # Create slice for diagonal in final 2 axes
+        idxs = np.arange(x.shape[-1])
+        diagonal_slice = ((len(result_shape) - 2) * [slice(None)] +
+                          [idxs + np.maximum(0, -offset),
+                           idxs + np.maximum(0, offset)])
+        # Fill in final 2 axes with x
+        result[diagonal_slice] = x
+        if len(x.shape) > 1:
+            # Re-order axes so they correspond to diagonals at axis1, axis2
+            axes = list(range(len(x.shape[:-1])))
+            last_idx = axes[-1]
+            axes = axes[:axis1] + [last_idx + 1] + axes[axis1:]
+            axes = axes[:axis2] + [last_idx + 2] + axes[axis2:]
+            result = result.transpose(axes)
+        z[0] = result
    def grad(self, inputs, gout):
        (gz,) = gout
-        return [diagonal(gz, offset=self.offset, axis1=0, axis2=1)]
+        return [diagonal(
+            gz,
+            offset=self.offset,
+            axis1=self.axis1,
+            axis2=self.axis2
+        )]
    def infer_shape(self, nodes, shapes):
-        return [(shapes[0][0],) * 2]
+        (x_shape,) = shapes
+        axis1 = np.minimum(self.axis1, self.axis2)
+        axis2 = np.maximum(self.axis1, self.axis2)
+        result_shape = list(x_shape[:-1])
+        diag_shape = x_shape[-1] + abs(self.offset)
+        result_shape = result_shape[:axis1] + [diag_shape] + result_shape[axis1:]
+        result_shape = result_shape[:axis2] + [diag_shape] + result_shape[axis2:]
+        return [tuple(result_shape)]
+    def __setstate__(self, state):
+        if "view_map" in state:
+            del state["view_map"]
+        self.__dict__.update(state)
+        if "offset" not in state:
+            self.offset = 0
+        if "axis1" not in state:
+            self.axis1 = 0
+        if "axis2" not in state:
+            self.axis2 = 1
 def diag(v, k=0):

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -7561,6 +7561,88 @@ class test_diag(unittest.TestCase):
        tensor.verify_grad(diag, [x], rng=rng)
+class TestAllocDiag(unittest.TestCase):
+    def __init__(self, name, alloc_diag=AllocDiag, mode=None):
+        self.alloc_diag = alloc_diag
+        if mode is None:
+            mode = theano.compile.mode.get_default_mode()
+        self.mode = mode
+        return super(TestAllocDiag, self).__init__(name)
+    def _generator(self):
+        dims = 4
+        shape = (5,) * dims
+        xv = np.random.randn(*shape).astype(config.floatX)
+        for d in xrange(1, dims + 1):
+            # Create a TensorType of the same dimensions as
+            # as the data we want to test.
+            x = TensorType(dtype=config.floatX, broadcastable=(False,) * d)('x')
+            # Make a slice of the test data that has the
+            # dimensions we need by doing xv[0,...,0]
+            # For example, for an array of shape (5,), we
+            # need to do xv[0, 0, 0, 0].
+            test_val = xv[((0,) * (dims - d))]
+            yield x, test_val
+    def test_alloc_diag_values(self):
+        for x, test_val in self._generator():
+            for offset, axis1, axis2 in [(0, 0, 1), (0, 1, 2), (1, 0, 1),
+                                         (0, 1, 3), (0, 2, 3), (1, 2, 3),
+                                         (-1, 0, 1), (-2, 0, 1), (-1, 1, 2)]:
+                # Test AllocDiag values
+                if np.maximum(axis1, axis2) > len(test_val.shape):
+                    continue
+                adiag_op = self.alloc_diag(offset=offset,
+                                           axis1=axis1,
+                                           axis2=axis2)
+                f = theano.function([x], adiag_op(x))
+                # AllocDiag and extract the diagonal again
+                # to check
+                diag_arr = f(test_val)
+                rediag = np.diagonal(
+                    diag_arr,
+                    offset=offset,
+                    axis1=axis1,
+                    axis2=axis2
+                )
+                assert np.all(rediag == test_val)
+                # Test infer_shape
+                f_shape = theano.function([x], adiag_op(x).shape, mode='FAST_RUN')
+                theano.printing.debugprint(f_shape.maker.fgraph.outputs[0])
+                output_shape = f_shape(test_val)
+                assert not any(isinstance(node.op, self.alloc_diag)
+                               for node in f_shape.maker.fgraph.toposort())
+                rediag_shape = np.diagonal(
+                    np.ones(output_shape),
+                    offset=offset,
+                    axis1=axis1,
+                    axis2=axis2
+                ).shape
+                assert np.all(rediag_shape == test_val.shape)
+                diag_x = adiag_op(x)
+                sum_diag_x = tensor.sum(diag_x)
+                grad_x = tensor.grad(sum_diag_x, x)
+                grad_diag_x = tensor.grad(sum_diag_x, diag_x)
+                f_grad_x = theano.function([x], grad_x, mode=self.mode)
+                f_grad_diag_x = theano.function([x], grad_diag_x, mode=self.mode)
+                grad_input = f_grad_x(test_val)
+                grad_diag_input = f_grad_diag_x(test_val)
+                true_grad_input = np.diagonal(
+                    grad_diag_input,
+                    offset=offset,
+                    axis1=axis1,
+                    axis2=axis2
+                )
+                assert np.all(true_grad_input == grad_input)
 class test_numpy_assumptions(unittest.TestCase):
    # Verify that some assumptions Theano makes on Numpy's behavior still hold.
    def test_ndarray_copy(self):