提交 871b89ef authored 作者: Zhouhan LIN's avatar Zhouhan LIN

move to the new backend

上级 2e699b53
...@@ -1076,3 +1076,107 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) { ...@@ -1076,3 +1076,107 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
return 0; return 0;
} }
""" % locals() """ % locals()
class GpuDiagonal(GpuOp):
__props__ = ("offset", "axis1", "axis2", "view")
def __init__(self, offset=0, axis1=0, axis2=1, view=False):
self.view = view
if self.view:
self.view_map = {0: [0]}
self.offset = offset
self.axis1 = axis1
self.axis2 = axis2
def make_node(self, _x):
x = as_cuda_ndarray_variable(_x)
if x.ndim < 2:
raise ValueError('Diagonal needs an input with 2 or more '
'dimensions', x)
axis_small, axis_large = sorted((self.axis1, self.axis2))
broadcastable = x.broadcastable[:axis_small] + \
x.broadcastable[axis_small + 1:axis_large] + \
x.broadcastable[axis_large + 1:] + (False,)
return Apply(self, [x], [x.type.__class__(
dtype=x.dtype,
broadcastable=broadcastable)()])
def perform(self, node, inputs, outputs):
(x,) = inputs
(z,) = outputs
# zero-dimensional matrices ...
if x.size == 0:
out_shape = [d for i, d in enumerate(x.shape)
if i not in (self.axis1, self.axis2)]
diag_size = numpy.min((x.shape[self.axis1], x.shape[self.axis2]))
out_shape.append(diag_size)
z[0] = node.outputs[0].type.value_zeros(tuple(out_shape))
return
# step 1) slicing on axis1 and axis2.
if self.offset >= 0:
stride_axis, slice_axis = self.axis1, self.axis2
else:
slice_axis, stride_axis = self.axis1, self.axis2
small_axis, large_axis = sorted((x.shape[self.axis1],
x.shape[self.axis2]))
if x.shape[stride_axis] < x.shape[slice_axis]:
# in the bigger triangle
numstride = small_axis - numpy.max((
0, small_axis + numpy.abs(self.offset) - large_axis))
else:
# in the smaller triangle
numstride = small_axis - numpy.abs(self.offset)
slicer = [numpy.s_[:], ] * x.ndim
slicer[stride_axis] = numpy.s_[:numstride]
slicer[slice_axis] = numpy.abs(self.offset)
slicer = tuple(slicer)
# step 2) Swap stride_axis to the last dim because we want the dim on
# which the diags extracted be listed as the last dim of the tensor.
# This is also in consistence with the interface of numpy.diagonal.
if slice_axis < stride_axis:
stride_axis -= 1
new_dim_order = range(x[slicer].ndim)
new_dim_order = tuple(new_dim_order[:stride_axis] +
new_dim_order[stride_axis + 1:] +
[stride_axis, ])
rval = cuda_ndarray.cuda_ndarray.dimshuffle(x[slicer], new_dim_order)
# step 3) modify the strides in the last axis, such that rval becomes
# a view on the diagonal.
other_strides = tuple([d for i, d in enumerate(x.strides)
if i not in (self.axis1, self.axis2)])
rval.strides = other_strides + \
(x.strides[self.axis1] + x.strides[self.axis2], )
if self.view:
z[0] = rval
else:
z[0] = rval.copy()
def grad(self, inputs, gout):
(input_x,) = inputs
return [grad_not_implemented(self, 0, input_x)]
def infer_shape(self, node, shapes):
in_shape, = shapes
dim1 = in_shape[self.axis1]
dim2 = in_shape[self.axis2]
out_shape = [d for i, d in enumerate(in_shape)
if i not in (self.axis1, self.axis2)]
# The following logic is inspired by C code of PyArray_Diagonal().
offset = self.offset
if offset > 0:
diag_size = theano.tensor.clip(dim2 - offset, 0, dim1)
elif offset < 0:
diag_size = theano.tensor.clip(dim1 + offset, 0, dim2)
else:
diag_size = theano.tensor.minimum(dim1, dim2)
out_shape.append(diag_size)
return [tuple(out_shape)]
...@@ -126,3 +126,27 @@ def test_adv_subtensor(): ...@@ -126,3 +126,27 @@ def test_adv_subtensor():
rval = f(idx1_val, idx2_val) rval = f(idx1_val, idx2_val)
rep = xval[idx1_val, None, slice(0, 2, 1), idx2_val, None] rep = xval[idx1_val, None, slice(0, 2, 1), idx2_val, None]
assert numpy.allclose(rval, rep) assert numpy.allclose(rval, rep)
class test_gpudiagonal(unittest.TestCase):
def test_matrix(self):
x = cuda.fmatrix()
np_x = numpy.arange(77).reshape(7, 11).astype('float32')
fn = theano.function([x], B.GpuDiagonal()(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), np_x.diagonal())
fn = theano.function([x], B.GpuDiagonal(2)(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), np_x.diagonal(2))
fn = theano.function([x], B.GpuDiagonal(-3)(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), np_x.diagonal(-3))
def test_tensor(self):
x = theano.tensor.ftensor4()
np_x = numpy.arange(30107).reshape(7, 11, 17, 23).astype('float32')
for offset, axis1, axis2 in [
(1, 0, 1), (-1, 0, 1), (0, 1, 0), (-2, 1, 0),
(-3, 1, 0), (-2, 2, 0), (3, 3, 0), (-1, 3, 2),
(2, 2, 3), (-1, 2, 1), (1, 3, 1), (-1, 1, 3)]:
assert numpy.allclose(
B.GpuDiagonal(offset, axis1, axis2)(x).eval({x: np_x}),
np_x.diagonal(offset, axis1, axis2))
...@@ -6026,7 +6026,7 @@ numpy_diagonal_return_view = numpy.may_share_memory(numpy.diagonal(x), x) ...@@ -6026,7 +6026,7 @@ numpy_diagonal_return_view = numpy.may_share_memory(numpy.diagonal(x), x)
del x del x
class Diagonal(Op): class ExtractDiag(Op):
"""Return specified diagonals. """Return specified diagonals.
Parameters Parameters
...@@ -6040,10 +6040,18 @@ class Diagonal(Op): ...@@ -6040,10 +6040,18 @@ class Diagonal(Op):
A vector representing the diagonal elements. A vector representing the diagonal elements.
""" """
__props__ = ("offset", "axis1", "axis2") __props__ = ("offset", "axis1", "axis2", "view")
def __init__(self, offset=0, axis1=0, axis2=1): def __init__(self, offset=0, axis1=0, axis2=1, view=False):
if numpy_diagonal_return_view: self.view = view
if self.view and not numpy_diagonal_return_view:
warnings.warn("View will forced to False. Diagonal property view is "
"set to True but numpy version %s and prior versions of "
"numpy.diagonal() do not return a view. Update "
"numpy to use Diagonal(view=True)" %
numpy.version.version)
self.view = False
if self.view:
self.view_map = {0: [0]} self.view_map = {0: [0]}
self.offset = offset self.offset = offset
self.axis1 = axis1 self.axis1 = axis1
...@@ -6051,9 +6059,13 @@ class Diagonal(Op): ...@@ -6051,9 +6059,13 @@ class Diagonal(Op):
def make_node(self, x): def make_node(self, x):
x = as_tensor_variable(x) x = as_tensor_variable(x)
assert x.ndim >= 2
return Apply(self, [x], [tensor(dtype=x.dtype, if x.ndim < 2:
broadcastable=[False] * (x.ndim - 1))]) raise ValueError('Diagonal needs an input with 2 or more '
'dimensions', x)
return Apply(self, [x], [x.type.__class__(
dtype=x.dtype,
broadcastable=[False] * (x.ndim - 1))()])
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
(x,) = inputs (x,) = inputs
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论