amendment

f4c355f4 · hantek · cd021d10 · f4c355f4 · f4c355f4 · f4c355f4
--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -1080,7 +1080,7 @@ __device__ ga_half atomicExch(ga_half *addr, ga_half val) {
        """ % locals()
-class GpuDiagonal(Subtensor):
+class GpuExtractDiag(Subtensor):
    __props__ = ("offset", "axis1", "axis2", "view")
    def __init__(self, offset=0, axis1=0, axis2=1, view=False):
@@ -1185,7 +1185,7 @@ class GpuDiagonal(Subtensor):
        return [tuple(out_shape)]
-class GpuAllocDiag(Subtensor):
+class GpuAllocDiag(Op):
    __props__ = ("offset",)
    def __init__(self, offset=0):
@@ -1206,20 +1206,20 @@ class GpuAllocDiag(Subtensor):
        (z,) = outputs
        dim = x.shape[0] + abs(self.offset)
-        z = gpuarray.zeros((dim, dim), dtype=x.dtype, context=x.context)
+        z[0] = gpuarray.zeros((dim, dim), dtype=x.dtype, context=x.context)
        if self.offset <= 0:  # diag in the lower triangle
-            diag_z = z[-self.offset, :(dim + self.offset)]
+            diag_z = z[0][-self.offset, :(dim + self.offset)]
        else:  # diag in the upper triangle
-            diag_z = z[:(dim - self.offset), self.offset]
+            diag_z = z[0][:(dim - self.offset), self.offset]
-        diag_z.strides = (sum(z.strides),)
+        diag_z.strides = (sum(z[0].strides),)
        diag_z[:] = x[:]
    def grad(self, inputs, gout):
-        (input_x,) = inputs
+        (gz,) = gout
-        return [grad_not_implemented(self, 0, input_x)]
+        return [GpuExtractDiag(offset=self.offset, axis1=0, axis2=1, view=False)(gz)]
    def infer_shape(self, node, shapes):
        dim = shapes[0][0] + abs(self.offset)
-        return [dim, dim]
+        return [[dim, dim]]
--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -14,7 +14,7 @@ from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
                         GpuAdvancedSubtensor,
                         GpuAdvancedIncSubtensor1,
                         GpuAdvancedIncSubtensor1_dev20,
-                         GpuDiagonal,
+                         GpuExtractDiag,
                         GpuAllocDiag)
 from ..type import gpuarray_shared_constructor
@@ -181,11 +181,11 @@ class test_gpudiagonal(unittest.TestCase):
    def test_matrix(self):
        x = tensor.matrix()
        np_x = np.arange(77).reshape(7, 11).astype(theano.config.floatX)
-        fn = theano.function([x], GpuDiagonal()(x), mode=mode_with_gpu)
+        fn = theano.function([x], GpuExtractDiag()(x), mode=mode_with_gpu)
        assert np.allclose(fn(np_x), np_x.diagonal())
-        fn = theano.function([x], GpuDiagonal(2)(x), mode=mode_with_gpu)
+        fn = theano.function([x], GpuExtractDiag(2)(x), mode=mode_with_gpu)
        assert np.allclose(fn(np_x), np_x.diagonal(2))
-        fn = theano.function([x], GpuDiagonal(-3)(x), mode=mode_with_gpu)
+        fn = theano.function([x], GpuExtractDiag(-3)(x), mode=mode_with_gpu)
        assert np.allclose(fn(np_x), np_x.diagonal(-3))
    def test_tensor(self):
@@ -196,7 +196,7 @@ class test_gpudiagonal(unittest.TestCase):
                (-3, 1, 0), (-2, 2, 0), (3, 3, 0), (-1, 3, 2),
                (2, 2, 3), (-1, 2, 1), (1, 3, 1), (-1, 1, 3)]:
            assert np.allclose(
-                GpuDiagonal(offset, axis1, axis2)(x).eval({x: np_x}),
+                GpuExtractDiag(offset, axis1, axis2)(x).eval({x: np_x}),
                np_x.diagonal(offset, axis1, axis2))
@@ -210,3 +210,11 @@ class test_gpuallocdiag(unittest.TestCase):
        assert np.allclose(fn(np_x), np.diag(np_x, 2))
        fn = theano.function([x], GpuAllocDiag(-3)(x), mode=mode_with_gpu)
        assert np.allclose(fn(np_x), np.diag(np_x, -3))
+    def test_grad(self):
+        x = tensor.vector()
+        np_x = np.arange(7).astype(theano.config.floatX)
+        mtx_x = GpuAllocDiag()(x)
+        sum_mtx_x = tensor.sum(mtx_x)
+        grad = tensor.grad(sum_mtx_x, x)
+        # assert
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -6167,8 +6167,6 @@ class ExtractDiag(Op):
 def diagonal(a, offset=0, axis1=0, axis2=1):
-    if (offset, axis1, axis2) == (0, 0, 1):
-        return theano.tensor.nlinalg.extract_diag(a)
    return ExtractDiag(offset, axis1, axis2)(a)
@@ -6177,7 +6175,6 @@ class AllocDiag(Op):
    An op that copies a vector to the diagonal of an empty matrix. It does the
    inverse of ExtractDiag.
-    __props__ = ()
    Usage: T.AllocDiag()(x)
    `x` should be a tensor vector. The parenthesis in the front should indicate
@@ -6210,9 +6207,6 @@ class AllocDiag(Op):
    def make_node(self, diag):
        diag = as_tensor_variable(diag)
-        if diag.type.ndim != 1:
-            raise TypeError('data argument must be a vector', diag.type)
        return Apply(self, [diag], [matrix(dtype=diag.dtype)])
    def perform(self, node, inputs, outputs):
@@ -6221,17 +6215,11 @@ class AllocDiag(Op):
    def grad(self, inputs, gout):
        (gz,) = gout
-        if self.has_default_props():
+        return [diagonal(gz, offset=self.offset, axis1=0, axis2=1)]
-            return [diagonal(gz)]
-        else:
-            return [grad_not_implemented(self, 0, inputs[0])]
    def infer_shape(self, nodes, shapes):
        return [(shapes[0][0],) * 2]
-    def has_default_props(self):
-        return self.offset == self.default_offset
 def diag(v, k=0):
    """
@@ -6255,10 +6243,9 @@ def diag(v, k=0):
    """
    if v.ndim == 1:
-        assert k == 0, "diagonals other than main are not implemented"
        return AllocDiag()(v)
    elif v.ndim >= 2:
-        return diagonal(v, k)
+        return diagonal(v, offset=k)
    else:
        raise ValueError("Input must has v.dim >= 1.")