move to the new backend

871b89ef · Zhouhan LIN · 2e699b53 · 871b89ef · 871b89ef · 871b89ef
--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -1076,3 +1076,107 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
          return 0;
        }
        """ % locals()
+class GpuDiagonal(GpuOp):
+    __props__ = ("offset", "axis1", "axis2", "view")
+    def __init__(self, offset=0, axis1=0, axis2=1, view=False):
+        self.view = view
+        if self.view:
+            self.view_map = {0: [0]}
+        self.offset = offset
+        self.axis1 = axis1
+        self.axis2 = axis2
+    def make_node(self, _x):
+        x = as_cuda_ndarray_variable(_x)
+        if x.ndim < 2:
+            raise ValueError('Diagonal needs an input with 2 or more '
+                             'dimensions', x)
+        axis_small, axis_large = sorted((self.axis1, self.axis2))
+        broadcastable = x.broadcastable[:axis_small] + \
+            x.broadcastable[axis_small + 1:axis_large] + \
+            x.broadcastable[axis_large + 1:] + (False,)
+        return Apply(self, [x], [x.type.__class__(
+            dtype=x.dtype,
+            broadcastable=broadcastable)()])
+    def perform(self, node, inputs, outputs):
+        (x,) = inputs
+        (z,) = outputs
+        # zero-dimensional matrices ...
+        if x.size == 0:
+            out_shape = [d for i, d in enumerate(x.shape)
+                         if i not in (self.axis1, self.axis2)]
+            diag_size = numpy.min((x.shape[self.axis1], x.shape[self.axis2]))
+            out_shape.append(diag_size)
+            z[0] = node.outputs[0].type.value_zeros(tuple(out_shape))
+            return
+        # step 1) slicing on axis1 and axis2.
+        if self.offset >= 0:
+            stride_axis, slice_axis = self.axis1, self.axis2
+        else:
+            slice_axis, stride_axis = self.axis1, self.axis2
+        small_axis, large_axis = sorted((x.shape[self.axis1],
+                                         x.shape[self.axis2]))
+        if x.shape[stride_axis] < x.shape[slice_axis]:
+            # in the bigger triangle
+            numstride = small_axis - numpy.max((
+                0, small_axis + numpy.abs(self.offset) - large_axis))
+        else:
+            # in the smaller triangle
+            numstride = small_axis - numpy.abs(self.offset)
+        slicer = [numpy.s_[:], ] * x.ndim
+        slicer[stride_axis] = numpy.s_[:numstride]
+        slicer[slice_axis] = numpy.abs(self.offset)
+        slicer = tuple(slicer)
+        # step 2) Swap stride_axis to the last dim because we want the dim on
+        # which the diags extracted be listed as the last dim of the tensor.
+        # This is also in consistence with the interface of numpy.diagonal.
+        if slice_axis < stride_axis:
+            stride_axis -= 1
+        new_dim_order = range(x[slicer].ndim)
+        new_dim_order = tuple(new_dim_order[:stride_axis] +
+                              new_dim_order[stride_axis + 1:] +
+                              [stride_axis, ])
+        rval = cuda_ndarray.cuda_ndarray.dimshuffle(x[slicer], new_dim_order)
+        # step 3) modify the strides in the last axis, such that rval becomes
+        # a view on the diagonal.
+        other_strides = tuple([d for i, d in enumerate(x.strides)
+                               if i not in (self.axis1, self.axis2)])
+        rval.strides = other_strides + \
+            (x.strides[self.axis1] + x.strides[self.axis2], )
+        if self.view:
+            z[0] = rval
+        else:
+            z[0] = rval.copy()
+    def grad(self, inputs, gout):
+        (input_x,) = inputs
+        return [grad_not_implemented(self, 0, input_x)]
+    def infer_shape(self, node, shapes):
+        in_shape, = shapes
+        dim1 = in_shape[self.axis1]
+        dim2 = in_shape[self.axis2]
+        out_shape = [d for i, d in enumerate(in_shape)
+                     if i not in (self.axis1, self.axis2)]
+        # The following logic is inspired by C code of PyArray_Diagonal().
+        offset = self.offset
+        if offset > 0:
+            diag_size = theano.tensor.clip(dim2 - offset, 0, dim1)
+        elif offset < 0:
+            diag_size = theano.tensor.clip(dim1 + offset, 0, dim2)
+        else:
+            diag_size = theano.tensor.minimum(dim1, dim2)
+        out_shape.append(diag_size)
+        return [tuple(out_shape)]
--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -126,3 +126,27 @@ def test_adv_subtensor():
    rval = f(idx1_val, idx2_val)
    rep = xval[idx1_val, None, slice(0, 2, 1), idx2_val, None]
    assert numpy.allclose(rval, rep)
+class test_gpudiagonal(unittest.TestCase):
+    def test_matrix(self):
+        x = cuda.fmatrix()
+        np_x = numpy.arange(77).reshape(7, 11).astype('float32')
+        fn = theano.function([x], B.GpuDiagonal()(x), mode=mode_with_gpu)
+        assert numpy.allclose(fn(np_x), np_x.diagonal())
+        fn = theano.function([x], B.GpuDiagonal(2)(x), mode=mode_with_gpu)
+        assert numpy.allclose(fn(np_x), np_x.diagonal(2))
+        fn = theano.function([x], B.GpuDiagonal(-3)(x), mode=mode_with_gpu)
+        assert numpy.allclose(fn(np_x), np_x.diagonal(-3))
+    def test_tensor(self):
+        x = theano.tensor.ftensor4()
+        np_x = numpy.arange(30107).reshape(7, 11, 17, 23).astype('float32')
+        for offset, axis1, axis2 in [
+                (1, 0, 1), (-1, 0, 1), (0, 1, 0), (-2, 1, 0),
+                (-3, 1, 0), (-2, 2, 0), (3, 3, 0), (-1, 3, 2),
+                (2, 2, 3), (-1, 2, 1), (1, 3, 1), (-1, 1, 3)]:
+            assert numpy.allclose(
+                B.GpuDiagonal(offset, axis1, axis2)(x).eval({x: np_x}),
+                np_x.diagonal(offset, axis1, axis2))
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -6026,7 +6026,7 @@ numpy_diagonal_return_view = numpy.may_share_memory(numpy.diagonal(x), x)
 del x
-class Diagonal(Op):
+class ExtractDiag(Op):
    """Return specified diagonals.
    Parameters
@@ -6040,10 +6040,18 @@ class Diagonal(Op):
        A vector representing the diagonal elements.
    """
-    __props__ = ("offset", "axis1", "axis2")
+    __props__ = ("offset", "axis1", "axis2", "view")
-    def __init__(self, offset=0, axis1=0, axis2=1):
+    def __init__(self, offset=0, axis1=0, axis2=1, view=False):
-        if numpy_diagonal_return_view:
+        self.view = view
+        if self.view and not numpy_diagonal_return_view:
+            warnings.warn("View will forced to False. Diagonal property view is "
+                          "set to True but numpy version %s and prior versions of "
+                          "numpy.diagonal() do not return a view. Update "
+                          "numpy to use Diagonal(view=True)" %
+                          numpy.version.version)
+            self.view = False
+        if self.view:
            self.view_map = {0: [0]}
        self.offset = offset
        self.axis1 = axis1
@@ -6051,9 +6059,13 @@ class Diagonal(Op):
    def make_node(self, x):
        x = as_tensor_variable(x)
-        assert x.ndim >= 2
-        return Apply(self, [x], [tensor(dtype=x.dtype,
+        if x.ndim < 2:
-                                        broadcastable=[False] * (x.ndim - 1))])
+            raise ValueError('Diagonal needs an input with 2 or more '
+                             'dimensions', x)
+        return Apply(self, [x], [x.type.__class__(
+            dtype=x.dtype,
+            broadcastable=[False] * (x.ndim - 1))()])
    def perform(self, node, inputs, outputs):
        (x,) = inputs