Merge pull request #723 from bouchnic/extra_ops

Add grad implementation for repeat for scalar.

Merge pull request #723 from bouchnic/extra_ops
4f524015 · nouiz · 15f2a18f · ba483b3f · 4f524015 · 4f524015
--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -259,21 +259,18 @@ def squeeze(x, out_nd):
 class RepeatOp(theano.Op):
    """Repeat elements of an array.
-    It returns an array which has the same shape as x, except
+    It returns an array which has the same shape as `x`, except
    along the given axis. The axis is used to speficy along which
    axis to repeat values. By default, use the flattened input
    array, and return a flat output array.
-    The number of repetitions for each element is repeat.
+    The number of repetitions for each element is `repeat`.
-    repeats is broadcasted to fit the shape of the given axis.
+    `repeats` is broadcasted to fit the length of the given `axis`.
-    Parameter:
+    :param x: Input data, tensor variable.
-    x -- Input data, tensor variable.
+    :param repeats: int, scalar or tensor variable.
-    repeats -- int, tensor variable.
-    Keywords arguments:
-    axis -- int, optional.
+    :param axis: int, optional.
    """
    def __init__(self, axis=None):
@@ -302,14 +299,27 @@ class RepeatOp(theano.Op):
        z = output_storage[0]
        z[0] = np.repeat(x, repeats=repeats, axis=self.axis)
-    def grad(self, inputs, outputs_gradients):
+    def grad(self, (x, repeats), (gz, )):
-        repeats = inputs[1]
+        if repeats.ndim == 0:
-        out = outputs_gradients[0]
+            if self.axis is None:
-        if inputs[0].ndim != 1:
+                axis = x.ndim
-            raise NotImplementedError()
+            else:
-        if repeats.ndim != 0:
+                if self.axis >= 0:
+                    axis = self.axis + 1
+                else:
+                    axis = self.axis + x.ndim + 1
+            shape = [x.shape[k] for k in range(x.ndim)]
+            shape.insert(axis, repeats)
+            return [gz.reshape(shape, x.ndim + 1).sum(axis=axis), None]
+        elif repeats.ndim == 1:
+            # For this implementation, we would need to specify the length
+            # of repeats in order to split gz in the right way to sum
+            # the good part.
            raise NotImplementedError()
-        return [out.reshape([inputs[0].shape[0], repeats]).sum(axis=1), None]
+        else:
+            raise ValueError()
    def infer_shape(self, node, ins_shapes):
        i0_shapes = ins_shapes[0]
@@ -317,9 +327,12 @@ class RepeatOp(theano.Op):
        out_shape = list(i0_shapes)
        if self.axis == None:
-            res = 0
+            if len(i0_shapes) == 0:
+                out_shape = [repeats]
+            else:
+                res = 1
                for d in i0_shapes:
-                res = res + d
+                    res = res * d
                out_shape = (res * repeats, )
        else:
            if repeats.ndim == 0:
@@ -335,21 +348,18 @@ class RepeatOp(theano.Op):
 def repeat(x, repeats, axis=None):
    """Repeat elements of an array.
-    It returns an array which has the same shape as x, except
+    It returns an array which has the same shape as `x`, except
    along the given axis. The axis is used to speficy along which
    axis to repeat values. By default, use the flattened input
    array, and return a flat output array.
-    The number of repetitions for each element is repeat.
+    The number of repetitions for each element is `repeat`.
-    repeats is broadcasted to fit the shape of the given axis.
+    `repeats` is broadcasted to fit the length of the given `axis`.
-    Parameter:
+    :param x: Input data, tensor variable.
-    x -- Input data, tensor variable.
+    :param repeats: int, scalar or tensor variable.
-    repeats -- int, tensor variable.
-    Keywords arguments:
-    axis -- int, optional.
+    :param axis: int, optional.
    """
    return RepeatOp(axis=axis)(x, repeats)

--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -137,56 +137,60 @@ class TestSqueezeOp(utt.InferShapeTester):
 class TestRepeatOp(utt.InferShapeTester):
-    nb = 5
    def setUp(self):
        super(TestRepeatOp, self).setUp()
        self.op_class = RepeatOp
        self.op = RepeatOp()
    def test_repeatOp(self):
-        x = T.dmatrix('x')
+        for ndim in range(3):
-        a = np.random.random((30, 50))
+            x = T.TensorType(theano.config.floatX, [False] * ndim)()
+            a = np.random.random((10, ) * ndim)
-        for axis in [None] + range(len(a.shape)):
+            r_var = T.lscalar()
-            for repeats in range(TestRepeatOp.nb):
+            r = 3
-                f = theano.function([x], repeat(x, repeats, axis=axis))
+            for axis in [None] + range(ndim):
-                assert np.allclose(np.repeat(a, repeats, axis=axis), f(a))
+                f = theano.function([x, r_var], repeat(x, r_var, axis=axis))
+                assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))
-    def test_infer_shape(self):
+            r_var = T.lvector()
-        x = T.dvector('x')
+            r = np.random.random_integers(5, size=(10,))
-        m = T.iscalars('m')
-        a = np.random.random(50)
-        self._compile_and_check([x, m],
+            for axis in range(ndim):
-                                [repeat(x, m)],
+                f = theano.function([x, r_var], repeat(x, r_var, axis=axis))
-                                [a, 2],
+                assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))
-                                self.op_class)
-        x = T.dmatrix('x')
-        a = np.random.random((40, 50))
+    def test_infer_shape(self):
-        for axis in range(len(a.shape)):
+        for ndim in range(4):
-            self._compile_and_check([x, m],
+            x = T.TensorType(theano.config.floatX, [False] * ndim)()
-                                    [repeat(x, m, axis=axis)],
+            a = np.random.random((10, ) * ndim)
-                                    [a, 2],
+            r_var = T.lscalar()
+            r = 3
+            for axis in [None] + range(ndim):
+                self._compile_and_check([x, r_var],
+                                        [RepeatOp(axis=axis)(x, r_var)],
+                                        [a, r],
                                        self.op_class)
-        m = T.lvector('m')
+            r_var = T.lvector()
-        repeats = np.random.random_integers(5, size=(40, ))
+            r = np.random.random_integers(5, size=(10,))
-        self._compile_and_check([x, m],
-                                [repeat(x, m, axis=0)],
+            for axis in range(ndim):
-                                [a, repeats],
+                self._compile_and_check([x, r_var],
+                                        [RepeatOp(axis=axis)(x, r_var)],
+                                        [a, r],
                                        self.op_class)
    def test_grad(self):
-        x = T.dvector('x')
+        for ndim in range(3):
-        a = np.random.random(50)
+            a = np.random.random((10, ) * ndim)
-        gf = theano.function([x], T.grad(T.sum(repeat(x, 3)), x))
-        def repeat_(a):
+            for axis in [None] + range(ndim):
-            return RepeatOp()(a, 3)
+                utt.verify_grad(lambda x: RepeatOp(axis=axis)(x, 3), [a])
-        utt.verify_grad(repeat_, [a])
+            if ndim > 0:
+                utt.verify_grad(lambda x: RepeatOp(axis=-1)(x, 3), [a])
 class TestBartlett(utt.InferShapeTester):
@@ -278,6 +282,7 @@ class TestFillDiagonal(utt.InferShapeTester):
                                self.op_class)
 if __name__ == "__main__":
+    utt.unittest.main()
    t = TestFillDiagonal('setUp')
    t.setUp()
    t.test_perform()