Merge files extra_ops/BinCountOp.py and extra_ops/DiffOp.py to extra_ops.py.

Split tests and code for these ops. Move the tests to tests/test_extra_ops.py. Create extra_ops/__init__.py

Merge files extra_ops/BinCountOp.py and extra_ops/DiffOp.py to extra_ops.py.
ef088251 · Nicolas Bouchard · 1f244aa4 · 1f244aa4 · ef088251 · ef088251
--- a/theano/extra_ops/DiffOp.py
+++ b/theano/extra_ops/DiffOp.py
-# TODO implement grad for higher dimension
-
-import theano
-import numpy as np
-from theano import tensor as T
-from theano.tests import unittest_tools as utt
-
-
-class DiffOp(theano.Op):
-    """Calculate the n-th order discrete difference along given axis.
-
-    The first order difference is given by out[n] = a[n+1] - a[n]
-    along the given axis, higher order differences are calculated by
-    using diff recursively. Wraping of numpy.diff for vector.
-
-    Parameter:
-    x -- Input vector.
-
-    Keywords arguments:
-    n -- The number of times values are differenced, default is 1.
-
-    """
-
-    def __init__(self, n=1):
-        self.n = n
-        # self.axis = axis
-
-    def __eq__(self, other):
-        return (type(self) == type(other) and
-               self.n == other.n)
-               # self.axis == other.axis
-
-    def __hash__(self):
-        return hash(type(self)) ^ hash(self.n)  # ^ hash(self.axis)
-
-    def make_node(self, x):
-        x = T.as_tensor_variable(x)
-        return theano.Apply(self, [x], [x.type()])
-
-    def perform(self, node, inputs, output_storage):
-        x = inputs[0]
-        z = output_storage[0]
-        z[0] = np.diff(x, self.n)  # axis
-
-    def grad(self, inputs, outputs_gradients):
-        z = outputs_gradients[0]
-
-        def _grad_helper(z):
-            pre = T.concatenate([[0.], z])  # Prepend 0
-            app = T.concatenate([z, [0.]])  # Append 0
-            return pre - app
-
-        for k in range(self.n):  # Apply grad recursively
-            z = _grad_helper(z)
-        return [z]
-
-    def infer_shape(self, node, ins_shapes):
-        i0_shapes = ins_shapes[0]
-        out_shape = list(i0_shapes)
-        out_shape[0] = out_shape[0] - self.n  # Axis
-        return [out_shape]
-
-    def __str__(self):
-        return self.__class__.__name__
-
-
-def diff(x, n=1):  # Axis
-    return DiffOp(n=n)(x)
-
-
-class TestDiffOp(utt.InferShapeTester):
-    nb = 10  # Number of time iterating for n
-
-    def setUp(self):
-        super(TestDiffOp, self).setUp()
-        self.op_class = DiffOp
-        self.op = DiffOp()
-
-    def test_diffOp(self):
-        x = T.dvector('x')
-        a = np.random.random(500)
-
-        f = theano.function([x], diff(x))
-        assert np.allclose(np.diff(a), f(a))
-
-        # Test n
-        for k in range(TestDiffOp.nb):
-            g = theano.function([x], diff(x, n=k))
-            assert np.allclose(np.diff(a, n=k), g(a))
-
-    def test_infer_shape(self):
-        x = T.dvector('x')
-
-        self._compile_and_check([x],
-                                [self.op(x)],
-                                [np.random.random(500)],
-                                self.op_class)
-
-        for k in range(TestDiffOp.nb):
-            self._compile_and_check([x],
-                                    [DiffOp(n=k)(x)],
-                                    [np.random.random(500)],
-                                    self.op_class)
-
-    def test_grad(self):
-        x = T.vector('x')
-        a = np.random.random(500)
-
-        gf = theano.function([x], T.grad(T.sum(diff(x)), x))
-        utt.verify_grad(self.op, [a])
-
-        # Test n
-        for k in range(TestDiffOp.nb):
-            dg = theano.function([x], T.grad(T.sum(diff(x, n=k)), x))
-            utt.verify_grad(DiffOp(n=k), [a])
--- a/theano/extra_ops/__init__.py
+++ b/theano/extra_ops/__init__.py
+from extra_ops import (DiffOp, diff,
+                       BinCountOp, bincount)
--- a/theano/extra_ops/BinCountOp.py
+++ b/theano/extra_ops/BinCountOp.py
 import theano
 import numpy as np
 from theano import tensor as T
-from theano.tests import unittest_tools as utt
+
+
+class DiffOp(theano.Op):
+    """Calculate the n-th order discrete difference along given axis.
+
+    The first order difference is given by out[n] = a[n+1] - a[n]
+    along the given axis, higher order differences are calculated by
+    using diff recursively. Wraping of numpy.diff.
+
+    Parameter:
+    x -- Input vector.
+
+    Keywords arguments:
+    n -- The number of times values are differenced, default is 1.
+
+    """
+
+    def __init__(self, n=1, axis=-1):
+        self.n = n
+        self.axis = axis
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.n == other.n and
+                self.axis == other.axis)
+
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.n) ^ hash(self.axis)
+
+    def make_node(self, x):
+        x = T.as_tensor_variable(x)
+        return theano.Apply(self, [x], [x.type()])
+
+    def perform(self, node, inputs, output_storage):
+        x = inputs[0]
+        z = output_storage[0]
+        z[0] = np.diff(x, n=self.n, axis=self.axis)
+
+    def grad(self, inputs, outputs_gradients):
+        inputs = inputs[0]
+
+        if inputs.ndim != 1:
+            raise TypeError("Grad is not implemented for inputs with"
+                            "number of dimension other than 1.")
+
+        z = outputs_gradients[0]
+
+        def _grad_helper(z):
+            pre = T.concatenate([[0.], z])
+            app = T.concatenate([z, [0.]])
+            return pre - app
+
+        for k in range(self.n):
+            z = _grad_helper(z)
+        return [z]
+
+    def infer_shape(self, node, ins_shapes):
+        i0_shapes = ins_shapes[0]
+        out_shape = list(i0_shapes)
+        out_shape[self.axis] = out_shape[self.axis] - self.n
+        return [out_shape]
+
+    def __str__(self):
+        return self.__class__.__name__
+
+
+def diff(x, n=1, axis=-1):
+    """Calculate the n-th order discrete difference along given axis.
+
+    The first order difference is given by out[n] = a[n+1] - a[n]
+    along the given axis, higher order differences are calculated by
+    using diff recursively. Wraping of numpy.diff.
+
+    Parameter:
+    x -- Input vector.
+
+    Keywords arguments:
+    n -- The number of times values are differenced, default is 1.
+
+    """
+    return DiffOp(n=n, axis=axis)(x)


 class BinCountOp(theano.Op):
@@ -24,50 +104,59 @@ class BinCountOp(theano.Op):
    minlength -- A minimum number of bins for the output array.

    """
+
    compatible_type = ('int8', 'int16', 'int32', 'int64',
                       'uint8', 'uint16', 'uint32', 'uint64')
+    """Tuple of all compatible dtype for the parameter of this op."""

-    def __init__(self, weights=None, minlength=None):
-        self.weights = weights
+    def __init__(self, minlength=None):
        self.minlength = minlength

    def __eq__(self, other):
        return (type(self) == type(other) and
-               self.weights == other.weights and
               self.minlength == other.minlength)

    def __hash__(self):
-        h = 0
-        if self.weights != None:
-            for k in range(len(self.weights)):
-                h = h ^ hash(self.weights[k])
-        return hash(type(self)) ^ h ^ hash(self.minlength)
+        return hash(type(self)) ^ hash(self.minlength)

-    def make_node(self, x):
+    def make_node(self, x, weights):
        x = T.as_tensor_variable(x)
+
        if x.dtype not in BinCountOp.compatible_type:
-            raise TypeError("Inputs must be integers.")
+            raise TypeError("Inputs dtype must be an integer.")
        if x.ndim != 1:
            raise TypeError("Inputs must be of dimension 1.")
-        return theano.Apply(self, [x], [x.type()])
+
+        if weights is None:
+            weights = theano.gof.Constant(theano.gof.Generic(), None)
+            out_type = x.type()
+        else:
+            weights = T.as_tensor_variable(weights)
+            out_type = weights.type()
+            if weights.ndim != 1:
+                raise TypeError("Weights cannot have a number of"
+                                "dimension different of 1.")
+
+        return theano.Apply(self, [x, weights], [out_type])

    def perform(self, node, inputs, output_storage):
        x = inputs[0]
-        if x.dtype not in BinCountOp.compatible_type:
-            raise TypeError("Inputs must be integers.")
-        if x.ndim != 1:
-            raise TypeError("Input must be of dimension 1.")
+        weights = inputs[1]
        z = output_storage[0]
-        z[0] = np.bincount(x, self.weights, self.minlength)
+
+        if weights is not None and weights.shape != x.shape:
+            raise TypeError("All inputs must have the same shape.")
+
+        z[0] = np.bincount(x, weights=weights, minlength=self.minlength)

    def grad(self, inputs, outputs_gradients):
-        return [None for i in inputs]  # Non differentiable
+        return [None for i in inputs]

    def infer_shape(self, node, ins_shapes):
-        inputs = node.inputs[0]
-        m = T.max(inputs) + 1
+        x = node.inputs[0]
+        m = T.max(x) + 1
        if self.minlength != None:
-            m = T.max(T.stack(m, self.minlength))
+            m = T.maximum(m, self.minlength)
        return [[m]]

    def __str__(self):
@@ -75,43 +164,23 @@ class BinCountOp(theano.Op):


 def bincount(x, weights=None, minlength=None):
-    return BinCountOp(weights=weights, minlength=minlength)(x)
-
-
-class TestBinCountOp(utt.InferShapeTester):
-    def setUp(self):
-        super(TestBinCountOp, self).setUp()
-        self.op_class = BinCountOp
-        self.op = BinCountOp()
-
-    def test_bincountOp(self):
-        x = T.lvector('x')
-        a = np.random.random_integers(50, size=(25))
-        w = np.random.random((25,))
-
-        f1 = theano.function([x], bincount(x))
-        f2 = theano.function([x], bincount(x, weights=w))
-        f3 = theano.function([x], bincount(x, minlength=23))
-
-        assert (np.bincount(a) == f1(a)).all
-        assert (np.bincount(a, weights=w) == f2(a)).all
-        assert (np.bincount(a, minlength=23) == f3(a)).all
-
-    def test_infer_shape(self):
-        x = T.lvector('x')
-
-        self._compile_and_check([x],
-                                [self.op(x)],
-                                [np.random.random_integers(50, size=(25,))],
-                                self.op_class)
-
-        w = np.random.random((25,))
-        self._compile_and_check([x],
-                                [bincount(x, weights=w)],
-                                [np.random.random_integers(50, size=(25,))],
-                                self.op_class)
-
-        self._compile_and_check([x],
-                                [bincount(x, minlength=60)],
-                                [np.random.random_integers(50, size=(25,))],
-                                self.op_class)
+    """Count number of occurrences of each value in array of non-negative ints.
+
+    The number of bins (of size 1) is one larger than the largest
+    value in x. If minlength is specified, there will be at least
+    this number of bins in the output array (though it will be longer
+    if necessary, depending on the contents of x). Each bin gives the
+    number of occurrences of its index value in x. If weights is
+    specified the input array is weighted by it, i.e. if a value n
+    is found at position i, out[n] += weight[i] instead of out[n] += 1.
+    Wraping of numpy.bincount
+
+    Parameter:
+    x -- 1 dimension, nonnegative ints
+
+    Keywords arguments:
+    weights -- Weights, array of the same shape as x.
+    minlength -- A minimum number of bins for the output array.
+
+    """
+    return BinCountOp(minlength=minlength)(x, weights)
--- a/theano/tests/test_extra_ops.py
+++ b/theano/tests/test_extra_ops.py
+import theano
+import numpy as np
+from theano import tensor as T
+from theano.tests import unittest_tools as utt
+
+from theano.extra_ops import *
+
+
+class TestBinCountOp(utt.InferShapeTester):
+    def setUp(self):
+        super(TestBinCountOp, self).setUp()
+        self.op_class = BinCountOp
+        self.op = BinCountOp()
+
+    def test_bincountOp(self):
+        x = T.lvector('x')
+        w = T.dvector('w')
+        a = np.random.random_integers(50, size=(25))
+        weights = np.random.random((25,))
+
+        f1 = theano.function([x], bincount(x))
+        f2 = theano.function([x, w], bincount(x, weights=w))
+        f3 = theano.function([x], bincount(x, minlength=23))
+        f4 = theano.function([x], bincount(x, minlength=5))
+
+        assert (np.bincount(a) == f1(a)).all()
+        assert np.allclose(np.bincount(a, weights=weights), f2(a, weights))
+        assert (np.bincount(a, minlength=23) == f3(a)).all()
+        assert (np.bincount(a, minlength=5) == f3(a)).all()
+
+    def test_infer_shape(self):
+        x = T.lvector('x')
+
+        self._compile_and_check([x],
+                                [bincount(x)],
+                                [np.random.random_integers(50, size=(25,))],
+                                self.op_class)
+
+        weights = np.random.random((25,))
+        self._compile_and_check([x],
+                                [bincount(x, weights=weights)],
+                                [np.random.random_integers(50, size=(25,))],
+                                self.op_class)
+
+        self._compile_and_check([x],
+                                [bincount(x, minlength=60)],
+                                [np.random.random_integers(50, size=(25,))],
+                                self.op_class)
+
+        self._compile_and_check([x],
+                                [bincount(x, minlength=5)],
+                                [np.random.random_integers(50, size=(25,))],
+                                self.op_class)
+
+
+class TestDiffOp(utt.InferShapeTester):
+    nb = 10  # Number of time iterating for n
+
+    def setUp(self):
+        super(TestDiffOp, self).setUp()
+        self.op_class = DiffOp
+        self.op = DiffOp()
+
+    def test_diffOp(self):
+        x = T.dmatrix('x')
+        a = np.random.random((30, 50))
+
+        f = theano.function([x], diff(x))
+        assert np.allclose(np.diff(a), f(a))
+
+        for axis in range(len(a.shape)):
+            for k in range(TestDiffOp.nb):
+                g = theano.function([x], diff(x, n=k, axis=axis))
+                assert np.allclose(np.diff(a, n=k, axis=axis), g(a))
+
+    def test_infer_shape(self):
+        x = T.dmatrix('x')
+        a = np.random.random((30, 50))
+
+        self._compile_and_check([x],
+                                [self.op(x)],
+                                [a],
+                                self.op_class)
+
+        for axis in range(len(a.shape)):
+            for k in range(TestDiffOp.nb):
+                self._compile_and_check([x],
+                                        [diff(x, n=k, axis=axis)],
+                                        [a],
+                                        self.op_class)
+
+    def test_grad(self):
+        x = T.vector('x')
+        a = np.random.random(500)
+
+        gf = theano.function([x], T.grad(T.sum(diff(x)), x))
+        utt.verify_grad(self.op, [a])
+
+        for k in range(TestDiffOp.nb):
+            dg = theano.function([x], T.grad(T.sum(diff(x, n=k)), x))
+            utt.verify_grad(DiffOp(n=k), [a])