Merge pull request #661 from bouchnic/new_ops

Two extra op from numpy.

Merge pull request #661 from bouchnic/new_ops
b0722fe2 · nouiz · a7cdae3f · 32fc67c4 · b0722fe2 · b0722fe2
--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
+import theano
+import numpy as np
+import basic
+
+
+class DiffOp(theano.Op):
+    """Calculate the n-th order discrete difference along given axis.
+
+    The first order difference is given by out[n] = a[n+1] - a[n]
+    along the given axis, higher order differences are calculated by
+    using diff recursively. Wraping of numpy.diff.
+
+    Parameter:
+    x -- Input vector.
+
+    Keywords arguments:
+    n -- The number of times values are differenced, default is 1.
+
+    """
+
+    def __init__(self, n=1, axis=-1):
+        self.n = n
+        self.axis = axis
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.n == other.n and
+                self.axis == other.axis)
+
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.n) ^ hash(self.axis)
+
+    def make_node(self, x):
+        x = basic.as_tensor_variable(x)
+        return theano.Apply(self, [x], [x.type()])
+
+    def perform(self, node, inputs, output_storage):
+        x = inputs[0]
+        z = output_storage[0]
+        z[0] = np.diff(x, n=self.n, axis=self.axis)
+
+    def grad(self, inputs, outputs_gradients):
+        inputs = inputs[0]
+
+        if inputs.ndim != 1:
+            raise NotImplementedError("Grad is not implemented for inputs with"
+                                      "number of dimension other than 1.")
+
+        z = outputs_gradients[0]
+
+        def _grad_helper(z):
+            pre = basic.concatenate([[0.], z])
+            app = basic.concatenate([z, [0.]])
+            return pre - app
+
+        for k in range(self.n):
+            z = _grad_helper(z)
+        return [z]
+
+    def infer_shape(self, node, ins_shapes):
+        i0_shapes = ins_shapes[0]
+        out_shape = list(i0_shapes)
+        out_shape[self.axis] = out_shape[self.axis] - self.n
+        return [out_shape]
+
+    def __str__(self):
+        return self.__class__.__name__
+
+
+def diff(x, n=1, axis=-1):
+    """Calculate the n-th order discrete difference along given axis.
+
+    The first order difference is given by out[n] = a[n+1] - a[n]
+    along the given axis, higher order differences are calculated by
+    using diff recursively. Wraping of numpy.diff.
+
+    Parameter:
+    x -- Input vector.
+
+    Keywords arguments:
+    n -- The number of times values are differenced, default is 1.
+
+    """
+    return DiffOp(n=n, axis=axis)(x)
+
+
+class BinCountOp(theano.Op):
+    """Count number of occurrences of each value in array of non-negative ints.
+
+    The number of bins (of size 1) is one larger than the largest
+    value in x. If minlength is specified, there will be at least
+    this number of bins in the output array (though it will be longer
+    if necessary, depending on the contents of x). Each bin gives the
+    number of occurrences of its index value in x. If weights is
+    specified the input array is weighted by it, i.e. if a value n
+    is found at position i, out[n] += weight[i] instead of out[n] += 1.
+    Wraping of numpy.bincount
+
+    Parameter:
+    x -- 1 dimension, nonnegative ints
+
+    Keywords arguments:
+    weights -- Weights, array of the same shape as x.
+    minlength -- A minimum number of bins for the output array.
+
+    """
+
+    compatible_type = ('int8', 'int16', 'int32', 'int64',
+                       'uint8', 'uint16', 'uint32', 'uint64')
+    """Tuple of all compatible dtype for the parameter of this op."""
+
+    def __init__(self, minlength=None):
+        self.minlength = minlength
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+               self.minlength == other.minlength)
+
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.minlength)
+
+    def make_node(self, x, weights):
+        x = basic.as_tensor_variable(x)
+
+        if x.dtype not in BinCountOp.compatible_type:
+            raise TypeError("Inputs dtype must be an integer.")
+        if x.ndim != 1:
+            raise TypeError("Inputs must be of dimension 1.")
+
+        if weights is None:
+            weights = theano.gof.Constant(theano.gof.Generic(), None)
+            out_type = x.type()
+        else:
+            weights = basic.as_tensor_variable(weights)
+            out_type = weights.type()
+            if weights.ndim != 1:
+                raise TypeError("Weights cannot have a number of"
+                                "dimension different of 1.")
+
+        return theano.Apply(self, [x, weights], [out_type])
+
+    def perform(self, node, inputs, output_storage):
+        x = inputs[0]
+        weights = inputs[1]
+        z = output_storage[0]
+
+        if weights is not None and weights.shape != x.shape:
+            raise TypeError("All inputs must have the same shape.")
+
+        z[0] = np.bincount(x, weights=weights, minlength=self.minlength)
+
+    def grad(self, inputs, outputs_gradients):
+        return [None for i in inputs]
+
+    def infer_shape(self, node, ins_shapes):
+        x = node.inputs[0]
+        m = basic.max(x) + 1
+        if self.minlength != None:
+            m = basic.maximum(m, self.minlength)
+        return [[m]]
+
+    def __str__(self):
+        return self.__class__.__name__
+
+
+def bincount(x, weights=None, minlength=None):
+    """Count number of occurrences of each value in array of non-negative ints.
+
+    The number of bins (of size 1) is one larger than the largest
+    value in x. If minlength is specified, there will be at least
+    this number of bins in the output array (though it will be longer
+    if necessary, depending on the contents of x). Each bin gives the
+    number of occurrences of its index value in x. If weights is
+    specified the input array is weighted by it, i.e. if a value n
+    is found at position i, out[n] += weight[i] instead of out[n] += 1.
+    Wraping of numpy.bincount
+
+    Parameter:
+    x -- 1 dimension, nonnegative ints
+
+    Keywords arguments:
+    weights -- Weights, array of the same shape as x.
+    minlength -- A minimum number of bins for the output array.
+
+    """
+    return BinCountOp(minlength=minlength)(x, weights)
+
+
+class SqueezeOp(theano.Op):
+    """Remove single-dimensional entries from the shape of an array.
+
+    It returns the input array, but with with all or a subset of the
+    dimensions of length 1 removed. This is always x itself or a view
+    into x. Wraping of numpy.squeeze.
+
+    Parameter:
+    x -- Input data, tensor variable.
+    out_nd -- Output number of dimension for this op.
+
+    """
+
+    def __init__(self, out_nd):
+        self.out_nd = out_nd
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.out_nd == other.out_nd)
+
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.out_nd)
+
+    def make_node(self, x):
+        x = basic.as_tensor_variable(x)
+        out_type = theano.tensor.TensorType(dtype=x.dtype,
+                              broadcastable=[False] * self.out_nd)
+        return theano.Apply(self, [x], [out_type()])
+
+    def perform(self, node, inputs, output_storage):
+        x = inputs[0]
+        z = output_storage[0]
+        squeezed = np.squeeze(x)
+        if squeezed.ndim != self.out_nd:
+            raise TypeError("The number of dimension specified "
+                            "is different from the one calculated.")
+        z[0] = squeezed
+
+    def grad(self, inputs, outputs_gradients):
+        out = outputs_gradients[0]
+        return [out.reshape(inputs[0].shape)]
+
+    def __str__(self):
+        return self.__class__.__name__
+
+
+def squeeze(x, out_nd):
+    """Remove single-dimensional entries from the shape of an array.
+
+    It returns the input array, but with with all or a subset of the
+    dimensions of length 1 removed. This is always x itself or a view
+    into x. Wraping of numpy.squeeze.
+
+    Parameter:
+    x -- Input data, tensor variable.
+    out_nd -- Output number of dimension for this op.
+
+    """
+    return SqueezeOp(out_nd=out_nd)(x)
+
+
+class RepeatOp(theano.Op):
+    """Repeat elements of an array.
+
+    It returns an array which has the same shape as x, except
+    along the given axis. The axis is used to speficy along which
+    axis to repeat values. By default, use the flattened input
+    array, and return a flat output array.
+
+    The number of repetitions for each element is repeat.
+    repeats is broadcasted to fit the shape of the given axis.
+
+    Parameter:
+    x -- Input data, tensor variable.
+    repeats -- int, tensor variable.
+
+    Keywords arguments:
+    axis -- int, optional.
+
+    """
+
+    def __init__(self, axis=None):
+        self.axis = axis
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.axis == other.axis)
+
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.axis)
+
+    def make_node(self, x, repeats):
+        x = basic.as_tensor_variable(x)
+        repeats = basic.as_tensor_variable(repeats)
+        if self.axis == None:
+            out_type = theano.tensor.TensorType(dtype=x.dtype,
+                                                broadcastable=[False])
+        else:
+            out_type = x.type
+        return theano.Apply(self, [x, repeats], [out_type()])
+
+    def perform(self, node, inputs, output_storage):
+        x = inputs[0]
+        repeats = inputs[1]
+        z = output_storage[0]
+        z[0] = np.repeat(x, repeats=repeats, axis=self.axis)
+
+    def grad(self, inputs, outputs_gradients):
+        repeats = inputs[1]
+        out = outputs_gradients[0]
+        if inputs[0].ndim != 1:
+            raise NotImplementedError()
+        if repeats.ndim != 0:
+            raise NotImplementedError()
+        return [out.reshape([inputs[0].shape[0], repeats]).sum(axis=1), None]
+
+    def infer_shape(self, node, ins_shapes):
+        i0_shapes = ins_shapes[0]
+        repeats = node.inputs[1]
+        out_shape = list(i0_shapes)
+
+        if self.axis == None:
+            res = 0
+            for d in i0_shapes:
+                res = res + d
+            out_shape = (res * repeats, )
+        else:
+            if repeats.ndim == 0:
+                out_shape[self.axis] = out_shape[self.axis] * repeats
+            else:
+                out_shape[self.axis] = theano.tensor.sum(repeats)
+        return [out_shape]
+
+    def __str__(self):
+        return self.__class__.__name__
+
+
+def repeat(x, repeats, axis=None):
+    """Repeat elements of an array.
+
+    It returns an array which has the same shape as x, except
+    along the given axis. The axis is used to speficy along which
+    axis to repeat values. By default, use the flattened input
+    array, and return a flat output array.
+
+    The number of repetitions for each element is repeat.
+    repeats is broadcasted to fit the shape of the given axis.
+
+    Parameter:
+    x -- Input data, tensor variable.
+    repeats -- int, tensor variable.
+
+    Keywords arguments:
+    axis -- int, optional.
+
+    """
+    return RepeatOp(axis=axis)(x, repeats)
--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
+import theano
+import numpy as np
+from theano import tensor as T
+from theano.tests import unittest_tools as utt
+
+from theano.tensor.extra_ops import *
+
+
+class TestBinCountOp(utt.InferShapeTester):
+    def setUp(self):
+        super(TestBinCountOp, self).setUp()
+        self.op_class = BinCountOp
+        self.op = BinCountOp()
+
+    def test_bincountOp(self):
+        x = T.lvector('x')
+        w = T.dvector('w')
+        a = np.random.random_integers(50, size=(25))
+        weights = np.random.random((25,))
+
+        f1 = theano.function([x], bincount(x))
+        f2 = theano.function([x, w], bincount(x, weights=w))
+        f3 = theano.function([x], bincount(x, minlength=23))
+        f4 = theano.function([x], bincount(x, minlength=5))
+
+        assert (np.bincount(a) == f1(a)).all()
+        assert np.allclose(np.bincount(a, weights=weights), f2(a, weights))
+        assert (np.bincount(a, minlength=23) == f3(a)).all()
+        assert (np.bincount(a, minlength=5) == f3(a)).all()
+
+    def test_infer_shape(self):
+        x = T.lvector('x')
+
+        self._compile_and_check([x],
+                                [bincount(x)],
+                                [np.random.random_integers(50, size=(25,))],
+                                self.op_class)
+
+        weights = np.random.random((25,))
+        self._compile_and_check([x],
+                                [bincount(x, weights=weights)],
+                                [np.random.random_integers(50, size=(25,))],
+                                self.op_class)
+
+        self._compile_and_check([x],
+                                [bincount(x, minlength=60)],
+                                [np.random.random_integers(50, size=(25,))],
+                                self.op_class)
+
+        self._compile_and_check([x],
+                                [bincount(x, minlength=5)],
+                                [np.random.random_integers(50, size=(25,))],
+                                self.op_class)
+
+
+class TestDiffOp(utt.InferShapeTester):
+    nb = 10  # Number of time iterating for n
+
+    def setUp(self):
+        super(TestDiffOp, self).setUp()
+        self.op_class = DiffOp
+        self.op = DiffOp()
+
+    def test_diffOp(self):
+        x = T.dmatrix('x')
+        a = np.random.random((30, 50))
+
+        f = theano.function([x], diff(x))
+        assert np.allclose(np.diff(a), f(a))
+
+        for axis in range(len(a.shape)):
+            for k in range(TestDiffOp.nb):
+                g = theano.function([x], diff(x, n=k, axis=axis))
+                assert np.allclose(np.diff(a, n=k, axis=axis), g(a))
+
+    def test_infer_shape(self):
+        x = T.dmatrix('x')
+        a = np.random.random((30, 50))
+
+        self._compile_and_check([x],
+                                [self.op(x)],
+                                [a],
+                                self.op_class)
+
+        for axis in range(len(a.shape)):
+            for k in range(TestDiffOp.nb):
+                self._compile_and_check([x],
+                                        [diff(x, n=k, axis=axis)],
+                                        [a],
+                                        self.op_class)
+
+    def test_grad(self):
+        x = T.vector('x')
+        a = np.random.random(500)
+
+        gf = theano.function([x], T.grad(T.sum(diff(x)), x))
+        utt.verify_grad(self.op, [a])
+
+        for k in range(TestDiffOp.nb):
+            dg = theano.function([x], T.grad(T.sum(diff(x, n=k)), x))
+            utt.verify_grad(DiffOp(n=k), [a])
+
+
+class TestSqueezeOp(utt.InferShapeTester):
+    def setUp(self):
+        super(TestSqueezeOp, self).setUp()
+        self.op_class = SqueezeOp
+        self.op = SqueezeOp(out_nd=1)
+
+    def test_squeezeOp(self):
+        x = T.dmatrix('x')
+        a = np.random.random((1, 50))
+
+        f = theano.function([x], squeeze(x, out_nd=1))
+        assert np.allclose(np.squeeze(a), f(a))
+
+        x = T.dtensor4('x')
+        f = theano.function([x], squeeze(x, out_nd=2))
+
+        a = np.random.random((1, 1, 2, 3))
+        assert np.allclose(np.squeeze(a), f(a))
+
+        a = np.random.random((1, 2, 2, 1))
+        assert np.allclose(np.squeeze(a), f(a))
+
+        a = np.random.random((4, 1, 2, 1))
+        assert np.allclose(np.squeeze(a), f(a))
+
+    def test_grad(self):
+        x = T.dtensor4('x')
+        a = np.random.random((1, 1, 3, 4))
+
+        gf = theano.function([x], T.grad(T.sum(squeeze(x, out_nd=1)), x))
+        utt.verify_grad(SqueezeOp(out_nd=2), [a])
+
+
+class TestRepeatOp(utt.InferShapeTester):
+    nb = 5
+
+    def setUp(self):
+        super(TestRepeatOp, self).setUp()
+        self.op_class = RepeatOp
+        self.op = RepeatOp()
+
+    def test_repeatOp(self):
+        x = T.dmatrix('x')
+        a = np.random.random((30, 50))
+
+        for axis in [None] + range(len(a.shape)):
+            for repeats in range(TestRepeatOp.nb):
+                f = theano.function([x], repeat(x, repeats, axis=axis))
+                assert np.allclose(np.repeat(a, repeats, axis=axis), f(a))
+
+    def test_infer_shape(self):
+        x = T.dvector('x')
+        m = T.iscalars('m')
+        a = np.random.random(50)
+
+        self._compile_and_check([x, m],
+                                [repeat(x, m)],
+                                [a, 2],
+                                self.op_class)
+
+        x = T.dmatrix('x')
+        a = np.random.random((40, 50))
+        for axis in range(len(a.shape)):
+            self._compile_and_check([x, m],
+                                    [repeat(x, m, axis=axis)],
+                                    [a, 2],
+                                    self.op_class)
+
+        m = T.lvector('m')
+        repeats = np.random.random_integers(5, size=(40, ))
+        self._compile_and_check([x, m],
+                                [repeat(x, m, axis=0)],
+                                [a, repeats],
+                                self.op_class)
+
+    def test_grad(self):
+        x = T.dvector('x')
+        a = np.random.random(50)
+
+        gf = theano.function([x], T.grad(T.sum(repeat(x, 3)), x))
+
+        def repeat_(a):
+            return RepeatOp()(a, 3)
+        utt.verify_grad(repeat_, [a])