Merge pull request #5247 from hantek/extractdiag

Moving ExtractDiag op to the new backend

Merge pull request #5247 from hantek/extractdiag
de2fba59 · abergeron · GitHub · c229fa84 · cb6d25b0 · de2fba59
--- a/theano/gpuarray/fft.py
+++ b/theano/gpuarray/fft.py
@@ -6,7 +6,9 @@ from theano import Op
 import theano.tensor as T
 from theano.gradient import DisconnectedType

-from theano.gpuarray import (basic_ops, GpuArrayType)
+from .basic_ops import (gpu_contiguous, as_gpuarray_variable,
+                        infer_context_name)
+from .type import GpuArrayType

 import theano.tensor.fft
 from .opt import register_opt, op_lifter, register_opt2
@@ -58,9 +60,8 @@ class CuRFFTOp(Op):
        if not pycuda_available:
            raise RuntimeError("pycuda is needed for CuFFTOp")

-        inp = basic_ops.gpu_contiguous(
-            basic_ops.as_gpuarray_variable(inp,
-                                           basic_ops.infer_context_name(inp)))
+        inp = gpu_contiguous(as_gpuarray_variable(inp,
+                                                  infer_context_name(inp)))

        # If no shape is provided as input, default to input data shape.
        if s is None:
@@ -183,9 +184,8 @@ class CuIRFFTOp(Op):
        if not pycuda_available:
            raise RuntimeError("pycuda is needed for CuIFFTOp")

-        inp = basic_ops.gpu_contiguous(
-            basic_ops.as_gpuarray_variable(inp,
-                                           basic_ops.infer_context_name(inp)))
+        inp = gpu_contiguous(as_gpuarray_variable(inp,
+                                                  infer_context_name(inp)))

        # If no shape is provided as input, calculate shape assuming even real transform.
        if s is None:

--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -7,6 +7,8 @@ from six import integer_types
 from six.moves import StringIO

 from theano import tensor, gof, Op
+from theano.gradient import grad_not_implemented
+import theano.tensor as T
 from theano.tensor.subtensor import IncSubtensor, Subtensor, get_idx_list

 try:
@@ -1076,3 +1078,108 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
          return 0;
        }
        """ % locals()
+
+
+class GpuDiagonal(Subtensor):
+    __props__ = ("offset", "axis1", "axis2", "view")
+
+    def __init__(self, offset=0, axis1=0, axis2=1, view=False):
+        self.view = view
+        if self.view:
+            self.view_map = {0: [0]}
+        self.offset = offset
+        self.axis1 = axis1
+        self.axis2 = axis2
+
+    def make_node(self, _x):
+        ctx_name = infer_context_name(_x)
+        x = as_gpuarray_variable(_x, ctx_name)
+
+        if x.ndim < 2:
+            raise ValueError('Diagonal needs an input with 2 or more '
+                             'dimensions', x)
+        axis_small, axis_large = sorted((self.axis1, self.axis2))
+        broadcastable = x.broadcastable[:axis_small] + \
+            x.broadcastable[axis_small + 1:axis_large] + \
+            x.broadcastable[axis_large + 1:] + (False,)
+        return gof.Apply(self, [x], [x.type.__class__(
+            dtype=x.dtype,
+            broadcastable=broadcastable)()])
+
+    def perform(self, node, inputs, outputs):
+        (x,) = inputs
+        (z,) = outputs
+        # zero-dimensional matrices ...
+        if x.size == 0:
+            out_shape = [d for i, d in enumerate(x.shape)
+                         if i not in (self.axis1, self.axis2)]
+            diag_size = numpy.min((x.shape[self.axis1], x.shape[self.axis2]))
+            out_shape.append(diag_size)
+            z[0] = node.outputs[0].type.value_zeros(tuple(out_shape))
+            return
+
+        # step 1) slicing on axis1 and axis2.
+        if self.offset >= 0:
+            stride_axis, slice_axis = self.axis1, self.axis2
+        else:
+            slice_axis, stride_axis = self.axis1, self.axis2
+
+        small_axis, large_axis = sorted((x.shape[self.axis1],
+                                         x.shape[self.axis2]))
+
+        if x.shape[stride_axis] < x.shape[slice_axis]:
+            # in the bigger triangle
+            numstride = small_axis - numpy.max((
+                0, small_axis + numpy.abs(self.offset) - large_axis))
+        else:
+            # in the smaller triangle
+            numstride = small_axis - numpy.abs(self.offset)
+
+        slicer = [numpy.s_[:], ] * x.ndim
+        slicer[stride_axis] = numpy.s_[:numstride]
+        slicer[slice_axis] = numpy.abs(self.offset)
+        slicer = tuple(slicer)
+
+        # step 2) Swap stride_axis to the last dim because we want the dim on
+        # which the diags extracted be listed as the last dim of the tensor.
+        # This is also in consistence with the interface of numpy.diagonal.
+        if slice_axis < stride_axis:
+            stride_axis -= 1
+        new_dim_order = range(x[slicer].ndim)
+        new_dim_order = tuple(new_dim_order[:stride_axis] +
+                              new_dim_order[stride_axis + 1:] +
+                              [stride_axis, ])
+        rval = x[slicer].transpose(new_dim_order)
+
+        # step 3) modify the strides in the last axis, such that rval becomes
+        # a view on the diagonal.
+        other_strides = tuple([d for i, d in enumerate(x.strides)
+                               if i not in (self.axis1, self.axis2)])
+        rval.strides = other_strides + \
+            (x.strides[self.axis1] + x.strides[self.axis2], )
+
+        if self.view:
+            z[0] = rval
+        else:
+            z[0] = rval.copy()
+
+    def grad(self, inputs, gout):
+        (input_x,) = inputs
+        return [grad_not_implemented(self, 0, input_x)]
+
+    def infer_shape(self, node, shapes):
+        in_shape, = shapes
+        dim1 = in_shape[self.axis1]
+        dim2 = in_shape[self.axis2]
+        out_shape = [d for i, d in enumerate(in_shape)
+                     if i not in (self.axis1, self.axis2)]
+        # The following logic is inspired by C code of PyArray_Diagonal().
+        offset = self.offset
+        if offset > 0:
+            diag_size = T.clip(dim2 - offset, 0, dim1)
+        elif offset < 0:
+            diag_size = T.clip(dim1 + offset, 0, dim2)
+        else:
+            diag_size = T.minimum(dim1, dim2)
+        out_shape.append(diag_size)
+        return [tuple(out_shape)]
--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
 from __future__ import absolute_import, print_function, division
 import numpy
+import unittest

 import theano
 from theano import tensor
@@ -11,7 +12,8 @@ from ..elemwise import GpuDimShuffle
 from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
                         GpuAdvancedSubtensor1,
                         GpuAdvancedSubtensor,
-                         GpuAdvancedIncSubtensor1)
+                         GpuAdvancedIncSubtensor1,
+                         GpuDiagonal)
 from ..type import gpuarray_shared_constructor

 from .config import mode_with_gpu
@@ -126,3 +128,26 @@ def test_adv_subtensor():
    rval = f(idx1_val, idx2_val)
    rep = xval[idx1_val, None, slice(0, 2, 1), idx2_val, None]
    assert numpy.allclose(rval, rep)
+
+
+class test_gpudiagonal(unittest.TestCase):
+    def test_matrix(self):
+        x = tensor.matrix()
+        np_x = numpy.arange(77).reshape(7, 11).astype(theano.config.floatX)
+        fn = theano.function([x], GpuDiagonal()(x), mode=mode_with_gpu)
+        assert numpy.allclose(fn(np_x), np_x.diagonal())
+        fn = theano.function([x], GpuDiagonal(2)(x), mode=mode_with_gpu)
+        assert numpy.allclose(fn(np_x), np_x.diagonal(2))
+        fn = theano.function([x], GpuDiagonal(-3)(x), mode=mode_with_gpu)
+        assert numpy.allclose(fn(np_x), np_x.diagonal(-3))
+
+    def test_tensor(self):
+        x = tensor.tensor4()
+        np_x = numpy.arange(30107).reshape(7, 11, 17, 23).astype(theano.config.floatX)
+        for offset, axis1, axis2 in [
+                (1, 0, 1), (-1, 0, 1), (0, 1, 0), (-2, 1, 0),
+                (-3, 1, 0), (-2, 2, 0), (3, 3, 0), (-1, 3, 2),
+                (2, 2, 3), (-1, 2, 1), (1, 3, 1), (-1, 1, 3)]:
+            assert numpy.allclose(
+                GpuDiagonal(offset, axis1, axis2)(x).eval({x: np_x}),
+                np_x.diagonal(offset, axis1, axis2))
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -6026,7 +6026,7 @@ numpy_diagonal_return_view = numpy.may_share_memory(numpy.diagonal(x), x)
 del x


-class Diagonal(Op):
+class ExtractDiag(Op):
    """Return specified diagonals.

    Parameters
@@ -6040,10 +6040,18 @@ class Diagonal(Op):
        A vector representing the diagonal elements.

    """
-    __props__ = ("offset", "axis1", "axis2")
+    __props__ = ("offset", "axis1", "axis2", "view")

-    def __init__(self, offset=0, axis1=0, axis2=1):
-        if numpy_diagonal_return_view:
+    def __init__(self, offset=0, axis1=0, axis2=1, view=False):
+        self.view = view
+        if self.view and not numpy_diagonal_return_view:
+            warnings.warn("View will forced to False. ExtractDiag property view is "
+                          "set to True but numpy version %s and prior versions of "
+                          "numpy.diagonal() do not return a view. Update "
+                          "numpy to use ExtractDiag(view=True)" %
+                          numpy.version.version)
+            self.view = False
+        if self.view:
            self.view_map = {0: [0]}
        self.offset = offset
        self.axis1 = axis1
@@ -6051,9 +6059,13 @@ class Diagonal(Op):

    def make_node(self, x):
        x = as_tensor_variable(x)
-        assert x.ndim >= 2
-        return Apply(self, [x], [tensor(dtype=x.dtype,
-                                        broadcastable=[False] * (x.ndim - 1))])
+
+        if x.ndim < 2:
+            raise ValueError('ExtractDiag needs an input with 2 or more '
+                             'dimensions', x)
+        return Apply(self, [x], [x.type.__class__(
+            dtype=x.dtype,
+            broadcastable=[False] * (x.ndim - 1))()])

    def perform(self, node, inputs, outputs):
        (x,) = inputs
@@ -6086,7 +6098,7 @@ class Diagonal(Op):
 def diagonal(a, offset=0, axis1=0, axis2=1):
    if (offset, axis1, axis2) == (0, 0, 1):
        return theano.tensor.nlinalg.extract_diag(a)
-    return Diagonal(offset, axis1, axis2)(a)
+    return ExtractDiag(offset, axis1, axis2)(a)


 class Diag(Op):

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -45,7 +45,7 @@ from theano.tensor import (_shared, wvector, bvector,
        tile, patternbroadcast, Eye, Shape, Dot, PermuteRowElements,
        ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
        dtensor3, SpecifyShape, Mean,
-        itensor3, Tile, switch, Diagonal, Diag,
+        itensor3, Tile, switch, ExtractDiag, Diag,
        nonzero, flatnonzero, nonzero_values,
        stacklists, DimShuffle, hessian, ptp, power,
        swapaxes, choose, Choose, NoneConst, AllocEmpty,
@@ -7427,27 +7427,27 @@ class TestInferShape(utt.InferShapeTester):
                                [Tri()(aiscal, biscal, ciscal)],
                                [3, 5, 0], Tri)

-        # Diagonal
+        # ExtractDiag
        atens3 = tensor3()
        atens3_val = rand(4, 5, 3)
-        atens3_diag = Diagonal()(atens3)
+        atens3_diag = ExtractDiag()(atens3)
        self._compile_and_check([atens3], [atens3_diag],
-                                [atens3_val], Diagonal)
-        atens3_diag = Diagonal(1)(atens3)
+                                [atens3_val], ExtractDiag)
+        atens3_diag = ExtractDiag(1)(atens3)
        self._compile_and_check([atens3], [atens3_diag],
-                                [atens3_val], Diagonal)
-        atens3_diag = Diagonal(-1)(atens3)
+                                [atens3_val], ExtractDiag)
+        atens3_diag = ExtractDiag(-1)(atens3)
        self._compile_and_check([atens3], [atens3_diag],
-                                [atens3_val], Diagonal)
-        atens3_diag = Diagonal(1, 0, 2)(atens3)
+                                [atens3_val], ExtractDiag)
+        atens3_diag = ExtractDiag(1, 0, 2)(atens3)
        self._compile_and_check([atens3], [atens3_diag],
-                                [atens3_val], Diagonal)
-        atens3_diag = Diagonal(1, 1, 2)(atens3)
+                                [atens3_val], ExtractDiag)
+        atens3_diag = ExtractDiag(1, 1, 2)(atens3)
        self._compile_and_check([atens3], [atens3_diag],
-                                [atens3_val], Diagonal)
-        atens3_diag = Diagonal(1, 2, 0)(atens3)
+                                [atens3_val], ExtractDiag)
+        atens3_diag = ExtractDiag(1, 2, 0)(atens3)
        self._compile_and_check([atens3], [atens3_diag],
-                                [atens3_val], Diagonal)
+                                [atens3_val], ExtractDiag)

        # Diag
        advec = dvector()