implement AllocDiag with offsets

cd021d10 · hantek · 86c150d8 · cd021d10 · cd021d10
--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -21,6 +21,7 @@ from .type import GpuArrayType, gpu_context_type
 from .basic_ops import (as_gpuarray_variable, HideC, GpuKernelBase, Kernel,
                        infer_context_name, gpu_contiguous)
+import pdb
 iadd_reg = {}
@@ -1187,7 +1188,7 @@ class GpuDiagonal(Subtensor):
 class GpuAllocDiag(Subtensor):
    __props__ = ("offset",)
-    def __init__(self, offset=0, axis1=0, axis2=1, view=False):
+    def __init__(self, offset=0):
        self.offset = offset
    def make_node(self, _x):
@@ -1196,23 +1197,29 @@ class GpuAllocDiag(Subtensor):
        if x.ndim != 1:
            raise ValueError('AllocDiag argument must be a vector!', x)
-        return gof.Apply(self, [x], [x.type.__class__(dtype=x.dtype)()])
+        return gof.Apply(self, [x], [x.type.__class__(dtype=x.dtype,
+            broadcastable=x.broadcastable)()])
    def perform(self, node, inputs, outputs):
        (x,) = inputs
        (z,) = outputs
-        pdb.set_trace()
+        dim = x.shape[0] + abs(self.offset)
+        z = gpuarray.zeros((dim, dim), dtype=x.dtype, context=x.context)
-        dim = x.shape[0] + self.offset
-        z = gpuarray.zeros((dim, dim))
+        if self.offset <= 0:  # diag in the lower triangle
-        # z.subtensor
+            diag_z = z[-self.offset, :(dim + self.offset)]
-        # alloc zero mtx in cudandarray
+        else:  # diag in the upper triangle
-        # z = set subtensor
+            diag_z = z[:(dim - self.offset), self.offset]
+        diag_z.strides = (sum(z.strides),)
+        diag_z[:] = x[:]
    def grad(self, inputs, gout):
        (input_x,) = inputs
        return [grad_not_implemented(self, 0, input_x)]
    def infer_shape(self, node, shapes):
-        return [(shapes[0][0],) * 2]
+        dim = shapes[0][0] + abs(self.offset)
+        return [dim, dim]
--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -20,6 +20,8 @@ from ..type import gpuarray_shared_constructor
 from .config import mode_with_gpu
+import pdb
 class G_subtensor(test_subtensor.T_subtensor):
    def shortDescription(self):
@@ -200,11 +202,11 @@ class test_gpudiagonal(unittest.TestCase):
 class test_gpuallocdiag(unittest.TestCase):
    def test_matrix(self):
-        x = tensor.matrix()
+        x = tensor.vector()
-        np_x = numpy.arange(7).astype(theano.config.floatX)
+        np_x = np.arange(7).astype(theano.config.floatX)
        fn = theano.function([x], GpuAllocDiag()(x), mode=mode_with_gpu)
-        assert numpy.allclose(fn(np_x), numpy.diag(np_x))
+        assert np.allclose(fn(np_x), np.diag(np_x))
        fn = theano.function([x], GpuAllocDiag(2)(x), mode=mode_with_gpu)
-        assert numpy.allclose(fn(np_x), numpy.diag(np_x, 2))
+        assert np.allclose(fn(np_x), np.diag(np_x, 2))
        fn = theano.function([x], GpuAllocDiag(-3)(x), mode=mode_with_gpu)
-        assert numpy.allclose(fn(np_x), numpy.diag(np_x, -3))
+        assert np.allclose(fn(np_x), np.diag(np_x, -3))