提交 f766415c authored 作者: Shawn Tan's avatar Shawn Tan

GpuAllocDiag implementation. Current tests work fine.

上级 268207b4
...@@ -1357,34 +1357,71 @@ class GpuExtractDiag(Op): ...@@ -1357,34 +1357,71 @@ class GpuExtractDiag(Op):
class GpuAllocDiag(Op): class GpuAllocDiag(Op):
__props__ = ("offset",) __props__ = ("offset", "axis1", "axis2")
def __init__(self, offset=0): def __init__(self, offset=0, axis1=0, axis2=1):
self.offset = offset self.offset = offset
self.axis1 = axis1
self.axis2 = axis2
def make_node(self, _x): def make_node(self, diag):
ctx_name = infer_context_name(_x) ctx_name = infer_context_name(diag)
x = as_gpuarray_variable(_x, ctx_name) diag = as_gpuarray_variable(diag, ctx_name)
if diag.type.ndim < 1:
if x.ndim != 1: raise ValueError('AllocDiag needs an input with 1 or more '
raise ValueError('AllocDiag argument must be a vector!', x) 'dimensions', diag.type)
return gof.Apply(
return gof.Apply(self, [x], [x.type.clone(broadcastable=(False, False))()]) self, [diag],
[diag.type.__class__(
dtype=diag.dtype,
broadcastable=[False] * (diag.ndim + 1))()]
)
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
(x,) = inputs (x,) = inputs
(z,) = outputs (z,) = outputs
axis1 = np.minimum(self.axis1, self.axis2)
axis2 = np.maximum(self.axis1, self.axis2)
offset = self.offset
dim = x.shape[0] + abs(self.offset) result_shape = x.shape[:-1] + (x.shape[-1] + abs(offset),) * 2
z[0] = gpuarray.zeros((dim, dim), dtype=x.dtype, context=x.context) result_buffer_shape = ((np.prod(x.shape[:-1]).astype(np.int64),) +
((x.shape[-1] + abs(offset)) ** 2,))
if self.offset <= 0: # diag in the lower triangle result_buffer = gpuarray.zeros(result_buffer_shape,
diag_z = z[0][-self.offset, :(dim + self.offset)] dtype=x.dtype,
else: # diag in the upper triangle context=x.context)
diag_z = z[0][:(dim - self.offset), self.offset]
diag_z.strides = (sum(z[0].strides),) if offset != 0:
row_size = x.shape[-1] + abs(offset)
diag_z[:] = x[:] if offset >= 0:
start_flattened_offset = abs(offset)
end_flattened_offset = row_size * abs(offset)
else:
start_flattened_offset = row_size * abs(offset)
end_flattened_offset = abs(offset)
diag_view = result_buffer[:, start_flattened_offset:-end_flattened_offset:row_size + 1]
# print("offset", offset)
# print("buffer shape:", result_buffer.shape)
# print("result_buffer[%d:%d:%d]" % (start_flattened_offset, -end_flattened_offset, row_size + 1), diag_view.shape)
# print("input_shape:", x.shape)
else:
diag_view = result_buffer[:, ::x.shape[-1] + 1]
diag_view[:] = x.reshape(diag_view.shape)[:]
result = result_buffer.reshape(result_shape)
# print(result)
# Fill in final 2 axes with x
if len(x.shape) > 1:
# Re-order axes so they correspond to diagonals at axis1, axis2
axes = list(range(len(x.shape[:-1])))
last_idx = axes[-1]
axes = axes[:axis1] + [last_idx + 1] + axes[axis1:]
axes = axes[:axis2] + [last_idx + 2] + axes[axis2:]
result = result.transpose(axes)
z[0] = result
def grad(self, inputs, gout): def grad(self, inputs, gout):
(gz,) = gout (gz,) = gout
......
...@@ -6573,9 +6573,10 @@ class AllocDiag(Op): ...@@ -6573,9 +6573,10 @@ class AllocDiag(Op):
result = np.zeros(result_shape, dtype=x.dtype) result = np.zeros(result_shape, dtype=x.dtype)
# Create slice for diagonal in final 2 axes # Create slice for diagonal in final 2 axes
idxs = np.arange(x.shape[-1])
diagonal_slice = ((len(result_shape) - 2) * [slice(None)] + diagonal_slice = ((len(result_shape) - 2) * [slice(None)] +
[np.arange(x.shape[-1]) + np.maximum(0, -offset), [idxs + np.maximum(0, -offset),
np.arange(x.shape[-1]) + np.maximum(0, offset)]) idxs + np.maximum(0, offset)])
# Fill in final 2 axes with x # Fill in final 2 axes with x
result[diagonal_slice] = x result[diagonal_slice] = x
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论