提交 de2fba59 authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #5247 from hantek/extractdiag

Moving ExtractDiag op to the new backend
......@@ -6,7 +6,9 @@ from theano import Op
import theano.tensor as T
from theano.gradient import DisconnectedType
from theano.gpuarray import (basic_ops, GpuArrayType)
from .basic_ops import (gpu_contiguous, as_gpuarray_variable,
infer_context_name)
from .type import GpuArrayType
import theano.tensor.fft
from .opt import register_opt, op_lifter, register_opt2
......@@ -58,9 +60,8 @@ class CuRFFTOp(Op):
if not pycuda_available:
raise RuntimeError("pycuda is needed for CuFFTOp")
inp = basic_ops.gpu_contiguous(
basic_ops.as_gpuarray_variable(inp,
basic_ops.infer_context_name(inp)))
inp = gpu_contiguous(as_gpuarray_variable(inp,
infer_context_name(inp)))
# If no shape is provided as input, default to input data shape.
if s is None:
......@@ -183,9 +184,8 @@ class CuIRFFTOp(Op):
if not pycuda_available:
raise RuntimeError("pycuda is needed for CuIFFTOp")
inp = basic_ops.gpu_contiguous(
basic_ops.as_gpuarray_variable(inp,
basic_ops.infer_context_name(inp)))
inp = gpu_contiguous(as_gpuarray_variable(inp,
infer_context_name(inp)))
# If no shape is provided as input, calculate shape assuming even real transform.
if s is None:
......
......@@ -7,6 +7,8 @@ from six import integer_types
from six.moves import StringIO
from theano import tensor, gof, Op
from theano.gradient import grad_not_implemented
import theano.tensor as T
from theano.tensor.subtensor import IncSubtensor, Subtensor, get_idx_list
try:
......@@ -1076,3 +1078,108 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
return 0;
}
""" % locals()
class GpuDiagonal(Subtensor):
__props__ = ("offset", "axis1", "axis2", "view")
def __init__(self, offset=0, axis1=0, axis2=1, view=False):
self.view = view
if self.view:
self.view_map = {0: [0]}
self.offset = offset
self.axis1 = axis1
self.axis2 = axis2
def make_node(self, _x):
ctx_name = infer_context_name(_x)
x = as_gpuarray_variable(_x, ctx_name)
if x.ndim < 2:
raise ValueError('Diagonal needs an input with 2 or more '
'dimensions', x)
axis_small, axis_large = sorted((self.axis1, self.axis2))
broadcastable = x.broadcastable[:axis_small] + \
x.broadcastable[axis_small + 1:axis_large] + \
x.broadcastable[axis_large + 1:] + (False,)
return gof.Apply(self, [x], [x.type.__class__(
dtype=x.dtype,
broadcastable=broadcastable)()])
def perform(self, node, inputs, outputs):
(x,) = inputs
(z,) = outputs
# zero-dimensional matrices ...
if x.size == 0:
out_shape = [d for i, d in enumerate(x.shape)
if i not in (self.axis1, self.axis2)]
diag_size = numpy.min((x.shape[self.axis1], x.shape[self.axis2]))
out_shape.append(diag_size)
z[0] = node.outputs[0].type.value_zeros(tuple(out_shape))
return
# step 1) slicing on axis1 and axis2.
if self.offset >= 0:
stride_axis, slice_axis = self.axis1, self.axis2
else:
slice_axis, stride_axis = self.axis1, self.axis2
small_axis, large_axis = sorted((x.shape[self.axis1],
x.shape[self.axis2]))
if x.shape[stride_axis] < x.shape[slice_axis]:
# in the bigger triangle
numstride = small_axis - numpy.max((
0, small_axis + numpy.abs(self.offset) - large_axis))
else:
# in the smaller triangle
numstride = small_axis - numpy.abs(self.offset)
slicer = [numpy.s_[:], ] * x.ndim
slicer[stride_axis] = numpy.s_[:numstride]
slicer[slice_axis] = numpy.abs(self.offset)
slicer = tuple(slicer)
# step 2) Swap stride_axis to the last dim because we want the dim on
# which the diags extracted be listed as the last dim of the tensor.
# This is also in consistence with the interface of numpy.diagonal.
if slice_axis < stride_axis:
stride_axis -= 1
new_dim_order = range(x[slicer].ndim)
new_dim_order = tuple(new_dim_order[:stride_axis] +
new_dim_order[stride_axis + 1:] +
[stride_axis, ])
rval = x[slicer].transpose(new_dim_order)
# step 3) modify the strides in the last axis, such that rval becomes
# a view on the diagonal.
other_strides = tuple([d for i, d in enumerate(x.strides)
if i not in (self.axis1, self.axis2)])
rval.strides = other_strides + \
(x.strides[self.axis1] + x.strides[self.axis2], )
if self.view:
z[0] = rval
else:
z[0] = rval.copy()
def grad(self, inputs, gout):
(input_x,) = inputs
return [grad_not_implemented(self, 0, input_x)]
def infer_shape(self, node, shapes):
in_shape, = shapes
dim1 = in_shape[self.axis1]
dim2 = in_shape[self.axis2]
out_shape = [d for i, d in enumerate(in_shape)
if i not in (self.axis1, self.axis2)]
# The following logic is inspired by C code of PyArray_Diagonal().
offset = self.offset
if offset > 0:
diag_size = T.clip(dim2 - offset, 0, dim1)
elif offset < 0:
diag_size = T.clip(dim1 + offset, 0, dim2)
else:
diag_size = T.minimum(dim1, dim2)
out_shape.append(diag_size)
return [tuple(out_shape)]
from __future__ import absolute_import, print_function, division
import numpy
import unittest
import theano
from theano import tensor
......@@ -11,7 +12,8 @@ from ..elemwise import GpuDimShuffle
from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1,
GpuAdvancedSubtensor,
GpuAdvancedIncSubtensor1)
GpuAdvancedIncSubtensor1,
GpuDiagonal)
from ..type import gpuarray_shared_constructor
from .config import mode_with_gpu
......@@ -126,3 +128,26 @@ def test_adv_subtensor():
rval = f(idx1_val, idx2_val)
rep = xval[idx1_val, None, slice(0, 2, 1), idx2_val, None]
assert numpy.allclose(rval, rep)
class test_gpudiagonal(unittest.TestCase):
def test_matrix(self):
x = tensor.matrix()
np_x = numpy.arange(77).reshape(7, 11).astype(theano.config.floatX)
fn = theano.function([x], GpuDiagonal()(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), np_x.diagonal())
fn = theano.function([x], GpuDiagonal(2)(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), np_x.diagonal(2))
fn = theano.function([x], GpuDiagonal(-3)(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), np_x.diagonal(-3))
def test_tensor(self):
x = tensor.tensor4()
np_x = numpy.arange(30107).reshape(7, 11, 17, 23).astype(theano.config.floatX)
for offset, axis1, axis2 in [
(1, 0, 1), (-1, 0, 1), (0, 1, 0), (-2, 1, 0),
(-3, 1, 0), (-2, 2, 0), (3, 3, 0), (-1, 3, 2),
(2, 2, 3), (-1, 2, 1), (1, 3, 1), (-1, 1, 3)]:
assert numpy.allclose(
GpuDiagonal(offset, axis1, axis2)(x).eval({x: np_x}),
np_x.diagonal(offset, axis1, axis2))
......@@ -6026,7 +6026,7 @@ numpy_diagonal_return_view = numpy.may_share_memory(numpy.diagonal(x), x)
del x
class Diagonal(Op):
class ExtractDiag(Op):
"""Return specified diagonals.
Parameters
......@@ -6040,10 +6040,18 @@ class Diagonal(Op):
A vector representing the diagonal elements.
"""
__props__ = ("offset", "axis1", "axis2")
__props__ = ("offset", "axis1", "axis2", "view")
def __init__(self, offset=0, axis1=0, axis2=1):
if numpy_diagonal_return_view:
def __init__(self, offset=0, axis1=0, axis2=1, view=False):
self.view = view
if self.view and not numpy_diagonal_return_view:
warnings.warn("View will forced to False. ExtractDiag property view is "
"set to True but numpy version %s and prior versions of "
"numpy.diagonal() do not return a view. Update "
"numpy to use ExtractDiag(view=True)" %
numpy.version.version)
self.view = False
if self.view:
self.view_map = {0: [0]}
self.offset = offset
self.axis1 = axis1
......@@ -6051,9 +6059,13 @@ class Diagonal(Op):
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim >= 2
return Apply(self, [x], [tensor(dtype=x.dtype,
broadcastable=[False] * (x.ndim - 1))])
if x.ndim < 2:
raise ValueError('ExtractDiag needs an input with 2 or more '
'dimensions', x)
return Apply(self, [x], [x.type.__class__(
dtype=x.dtype,
broadcastable=[False] * (x.ndim - 1))()])
def perform(self, node, inputs, outputs):
(x,) = inputs
......@@ -6086,7 +6098,7 @@ class Diagonal(Op):
def diagonal(a, offset=0, axis1=0, axis2=1):
if (offset, axis1, axis2) == (0, 0, 1):
return theano.tensor.nlinalg.extract_diag(a)
return Diagonal(offset, axis1, axis2)(a)
return ExtractDiag(offset, axis1, axis2)(a)
class Diag(Op):
......
......@@ -45,7 +45,7 @@ from theano.tensor import (_shared, wvector, bvector,
tile, patternbroadcast, Eye, Shape, Dot, PermuteRowElements,
ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
dtensor3, SpecifyShape, Mean,
itensor3, Tile, switch, Diagonal, Diag,
itensor3, Tile, switch, ExtractDiag, Diag,
nonzero, flatnonzero, nonzero_values,
stacklists, DimShuffle, hessian, ptp, power,
swapaxes, choose, Choose, NoneConst, AllocEmpty,
......@@ -7427,27 +7427,27 @@ class TestInferShape(utt.InferShapeTester):
[Tri()(aiscal, biscal, ciscal)],
[3, 5, 0], Tri)
# Diagonal
# ExtractDiag
atens3 = tensor3()
atens3_val = rand(4, 5, 3)
atens3_diag = Diagonal()(atens3)
atens3_diag = ExtractDiag()(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(1)(atens3)
[atens3_val], ExtractDiag)
atens3_diag = ExtractDiag(1)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(-1)(atens3)
[atens3_val], ExtractDiag)
atens3_diag = ExtractDiag(-1)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(1, 0, 2)(atens3)
[atens3_val], ExtractDiag)
atens3_diag = ExtractDiag(1, 0, 2)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(1, 1, 2)(atens3)
[atens3_val], ExtractDiag)
atens3_diag = ExtractDiag(1, 1, 2)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(1, 2, 0)(atens3)
[atens3_val], ExtractDiag)
atens3_diag = ExtractDiag(1, 2, 0)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
[atens3_val], ExtractDiag)
# Diag
advec = dvector()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论