提交 cd021d10 authored 作者: hantek's avatar hantek

implement AllocDiag with offsets

上级 86c150d8
...@@ -21,6 +21,7 @@ from .type import GpuArrayType, gpu_context_type ...@@ -21,6 +21,7 @@ from .type import GpuArrayType, gpu_context_type
from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel, from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel,
infer_context_name, gpu_contiguous) infer_context_name, gpu_contiguous)
import pdb
iadd_reg = {} iadd_reg = {}
...@@ -1187,7 +1188,7 @@ class GpuDiagonal(Subtensor): ...@@ -1187,7 +1188,7 @@ class GpuDiagonal(Subtensor):
class GpuAllocDiag(Subtensor): class GpuAllocDiag(Subtensor):
__props__ = ("offset",) __props__ = ("offset",)
def __init__(self, offset=0, axis1=0, axis2=1, view=False): def __init__(self, offset=0):
self.offset = offset self.offset = offset
def make_node(self, _x): def make_node(self, _x):
...@@ -1196,23 +1197,29 @@ class GpuAllocDiag(Subtensor): ...@@ -1196,23 +1197,29 @@ class GpuAllocDiag(Subtensor):
if x.ndim != 1: if x.ndim != 1:
raise ValueError('AllocDiag argument must be a vector!', x) raise ValueError('AllocDiag argument must be a vector!', x)
return gof.Apply(self, [x], [x.type.__class__(dtype=x.dtype)()])
return gof.Apply(self, [x], [x.type.__class__(dtype=x.dtype,
broadcastable=x.broadcastable)()])
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
(x,) = inputs (x,) = inputs
(z,) = outputs (z,) = outputs
pdb.set_trace() dim = x.shape[0] + abs(self.offset)
z = gpuarray.zeros((dim, dim), dtype=x.dtype, context=x.context)
dim = x.shape[0] + self.offset
z = gpuarray.zeros((dim, dim)) if self.offset <= 0: # diag in the lower triangle
# z.subtensor diag_z = z[-self.offset, :(dim + self.offset)]
# alloc zero mtx in cudandarray else: # diag in the upper triangle
# z = set subtensor diag_z = z[:(dim - self.offset), self.offset]
diag_z.strides = (sum(z.strides),)
diag_z[:] = x[:]
def grad(self, inputs, gout): def grad(self, inputs, gout):
(input_x,) = inputs (input_x,) = inputs
return [grad_not_implemented(self, 0, input_x)] return [grad_not_implemented(self, 0, input_x)]
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
return [(shapes[0][0],) * 2] dim = shapes[0][0] + abs(self.offset)
return [dim, dim]
...@@ -20,6 +20,8 @@ from ..type import gpuarray_shared_constructor ...@@ -20,6 +20,8 @@ from ..type import gpuarray_shared_constructor
from .config import mode_with_gpu from .config import mode_with_gpu
import pdb
class G_subtensor(test_subtensor.T_subtensor): class G_subtensor(test_subtensor.T_subtensor):
def shortDescription(self): def shortDescription(self):
...@@ -200,11 +202,11 @@ class test_gpudiagonal(unittest.TestCase): ...@@ -200,11 +202,11 @@ class test_gpudiagonal(unittest.TestCase):
class test_gpuallocdiag(unittest.TestCase): class test_gpuallocdiag(unittest.TestCase):
def test_matrix(self): def test_matrix(self):
x = tensor.matrix() x = tensor.vector()
np_x = numpy.arange(7).astype(theano.config.floatX) np_x = np.arange(7).astype(theano.config.floatX)
fn = theano.function([x], GpuAllocDiag()(x), mode=mode_with_gpu) fn = theano.function([x], GpuAllocDiag()(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), numpy.diag(np_x)) assert np.allclose(fn(np_x), np.diag(np_x))
fn = theano.function([x], GpuAllocDiag(2)(x), mode=mode_with_gpu) fn = theano.function([x], GpuAllocDiag(2)(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), numpy.diag(np_x, 2)) assert np.allclose(fn(np_x), np.diag(np_x, 2))
fn = theano.function([x], GpuAllocDiag(-3)(x), mode=mode_with_gpu) fn = theano.function([x], GpuAllocDiag(-3)(x), mode=mode_with_gpu)
assert numpy.allclose(fn(np_x), numpy.diag(np_x, -3)) assert np.allclose(fn(np_x), np.diag(np_x, -3))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论