Merge pull request #723 from bouchnic/extra_ops

Add grad implementation for repeat for scalar.

Merge pull request #723 from bouchnic/extra_ops
4f524015 · nouiz · 15f2a18f · ba483b3f · 4f524015 · 4f524015
--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -259,21 +259,18 @@ def squeeze(x, out_nd):
 class RepeatOp(theano.Op):
    """Repeat elements of an array.

-    It returns an array which has the same shape as x, except
+    It returns an array which has the same shape as `x`, except
    along the given axis. The axis is used to speficy along which
    axis to repeat values. By default, use the flattened input
    array, and return a flat output array.

-    The number of repetitions for each element is repeat.
-    repeats is broadcasted to fit the shape of the given axis.
+    The number of repetitions for each element is `repeat`.
+    `repeats` is broadcasted to fit the length of the given `axis`.

-    Parameter:
-    x -- Input data, tensor variable.
-    repeats -- int, tensor variable.
-
-    Keywords arguments:
-    axis -- int, optional.
+    :param x: Input data, tensor variable.
+    :param repeats: int, scalar or tensor variable.

+    :param axis: int, optional.
    """

    def __init__(self, axis=None):
@@ -302,14 +299,27 @@ class RepeatOp(theano.Op):
        z = output_storage[0]
        z[0] = np.repeat(x, repeats=repeats, axis=self.axis)

-    def grad(self, inputs, outputs_gradients):
-        repeats = inputs[1]
-        out = outputs_gradients[0]
-        if inputs[0].ndim != 1:
-            raise NotImplementedError()
-        if repeats.ndim != 0:
+    def grad(self, (x, repeats), (gz, )):
+        if repeats.ndim == 0:
+            if self.axis is None:
+                axis = x.ndim
+            else:
+                if self.axis >= 0:
+                    axis = self.axis + 1
+                else:
+                    axis = self.axis + x.ndim + 1
+
+            shape = [x.shape[k] for k in range(x.ndim)]
+            shape.insert(axis, repeats)
+
+            return [gz.reshape(shape, x.ndim + 1).sum(axis=axis), None]
+        elif repeats.ndim == 1:
+            # For this implementation, we would need to specify the length
+            # of repeats in order to split gz in the right way to sum
+            # the good part.
            raise NotImplementedError()
-        return [out.reshape([inputs[0].shape[0], repeats]).sum(axis=1), None]
+        else:
+            raise ValueError()

    def infer_shape(self, node, ins_shapes):
        i0_shapes = ins_shapes[0]
@@ -317,10 +327,13 @@ class RepeatOp(theano.Op):
        out_shape = list(i0_shapes)

        if self.axis == None:
-            res = 0
-            for d in i0_shapes:
-                res = res + d
-            out_shape = (res * repeats, )
+            if len(i0_shapes) == 0:
+                out_shape = [repeats]
+            else:
+                res = 1
+                for d in i0_shapes:
+                    res = res * d
+                out_shape = (res * repeats, )
        else:
            if repeats.ndim == 0:
                out_shape[self.axis] = out_shape[self.axis] * repeats
@@ -335,21 +348,18 @@ class RepeatOp(theano.Op):
 def repeat(x, repeats, axis=None):
    """Repeat elements of an array.

-    It returns an array which has the same shape as x, except
+    It returns an array which has the same shape as `x`, except
    along the given axis. The axis is used to speficy along which
    axis to repeat values. By default, use the flattened input
    array, and return a flat output array.

-    The number of repetitions for each element is repeat.
-    repeats is broadcasted to fit the shape of the given axis.
+    The number of repetitions for each element is `repeat`.
+    `repeats` is broadcasted to fit the length of the given `axis`.

-    Parameter:
-    x -- Input data, tensor variable.
-    repeats -- int, tensor variable.
-
-    Keywords arguments:
-    axis -- int, optional.
+    :param x: Input data, tensor variable.
+    :param repeats: int, scalar or tensor variable.

+    :param axis: int, optional.
    """
    return RepeatOp(axis=axis)(x, repeats)


--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -137,56 +137,60 @@ class TestSqueezeOp(utt.InferShapeTester):


 class TestRepeatOp(utt.InferShapeTester):
-    nb = 5
-
    def setUp(self):
        super(TestRepeatOp, self).setUp()
        self.op_class = RepeatOp
        self.op = RepeatOp()

    def test_repeatOp(self):
-        x = T.dmatrix('x')
-        a = np.random.random((30, 50))
+        for ndim in range(3):
+            x = T.TensorType(theano.config.floatX, [False] * ndim)()
+            a = np.random.random((10, ) * ndim)
+
+            r_var = T.lscalar()
+            r = 3
+            for axis in [None] + range(ndim):
+                f = theano.function([x, r_var], repeat(x, r_var, axis=axis))
+                assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))
+
+            r_var = T.lvector()
+            r = np.random.random_integers(5, size=(10,))
+
+            for axis in range(ndim):
+                f = theano.function([x, r_var], repeat(x, r_var, axis=axis))
+                assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))

-        for axis in [None] + range(len(a.shape)):
-            for repeats in range(TestRepeatOp.nb):
-                f = theano.function([x], repeat(x, repeats, axis=axis))
-                assert np.allclose(np.repeat(a, repeats, axis=axis), f(a))

    def test_infer_shape(self):
-        x = T.dvector('x')
-        m = T.iscalars('m')
-        a = np.random.random(50)
+        for ndim in range(4):
+            x = T.TensorType(theano.config.floatX, [False] * ndim)()
+            a = np.random.random((10, ) * ndim)
+
+            r_var = T.lscalar()
+            r = 3
+            for axis in [None] + range(ndim):
+                self._compile_and_check([x, r_var],
+                                        [RepeatOp(axis=axis)(x, r_var)],
+                                        [a, r],
+                                        self.op_class)

-        self._compile_and_check([x, m],
-                                [repeat(x, m)],
-                                [a, 2],
-                                self.op_class)
+            r_var = T.lvector()
+            r = np.random.random_integers(5, size=(10,))

-        x = T.dmatrix('x')
-        a = np.random.random((40, 50))
-        for axis in range(len(a.shape)):
-            self._compile_and_check([x, m],
-                                    [repeat(x, m, axis=axis)],
-                                    [a, 2],
-                                    self.op_class)
-
-        m = T.lvector('m')
-        repeats = np.random.random_integers(5, size=(40, ))
-        self._compile_and_check([x, m],
-                                [repeat(x, m, axis=0)],
-                                [a, repeats],
-                                self.op_class)
+            for axis in range(ndim):
+                self._compile_and_check([x, r_var],
+                                        [RepeatOp(axis=axis)(x, r_var)],
+                                        [a, r],
+                                        self.op_class)

    def test_grad(self):
-        x = T.dvector('x')
-        a = np.random.random(50)
-
-        gf = theano.function([x], T.grad(T.sum(repeat(x, 3)), x))
+        for ndim in range(3):
+            a = np.random.random((10, ) * ndim)

-        def repeat_(a):
-            return RepeatOp()(a, 3)
-        utt.verify_grad(repeat_, [a])
+            for axis in [None] + range(ndim):
+                utt.verify_grad(lambda x: RepeatOp(axis=axis)(x, 3), [a])
+            if ndim > 0:
+                utt.verify_grad(lambda x: RepeatOp(axis=-1)(x, 3), [a])


 class TestBartlett(utt.InferShapeTester):
@@ -278,6 +282,7 @@ class TestFillDiagonal(utt.InferShapeTester):
                                self.op_class)

 if __name__ == "__main__":
+    utt.unittest.main()
    t = TestFillDiagonal('setUp')
    t.setUp()
    t.test_perform()