Merge pull request #663 from larseeri/new_ops

Add the Theano op Bartlett, FillDiagonal and Fourier.

Merge pull request #663 from larseeri/new_ops
8bb23735 · nouiz · dcd48dd0 · 3c5d0e22 · 8bb23735 · 8bb23735
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -556,12 +556,13 @@ class numeric_grad(object):
            numpy.dtype('float64'): 1e-7,
            numpy.dtype('float32'): 3e-4}
-    def __init__(self, f, pt, eps=None):
+    def __init__(self, f, pt, eps=None, out_type=None):
        """Return the gradient of f at pt.
        :param f: a differentiable function such that f(*pt) is a scalar
        :param pt: an ndarray, a list of ndarrays or tuple of ndarrays
+        :param out_type: dtype of output, if complex (i.e. 'complex32' or
+        'complex64')
        This function computes the gradient by a one-sided finite
        differences of a fixed step size (eps).
@@ -595,18 +596,21 @@ class numeric_grad(object):
        #      raise TypeError('All function arguments must have same dtype')
        total_size = __builtin__.sum(prod(sh) for sh in shapes)
        working_dtype = __builtin__.min((self.type_eps[dt], dt)
                                        for dt in dtypes)[1]
-        #create un-initialized memory
+        # create un-initialized memory
        x = numpy.ndarray((total_size,), dtype=working_dtype)
-        gx = numpy.ndarray((total_size,), dtype=working_dtype)
+        if (not out_type is None) and (out_type.startswith('complex')):
+            gx = numpy.ndarray((total_size,), dtype=out_type)
+        else:
+            gx = numpy.ndarray((total_size,), dtype=working_dtype)
        if eps is None:
            eps = __builtin__.max(self.type_eps[dt] for dt in dtypes)
-        #set up aliases so that apt[i] is backed by memory in x
+        # set up aliases so that apt[i] is backed by memory in x
        # and self.gf is backed by memory in gx
        cur_pos = 0
        self.gf = []
@@ -629,8 +633,13 @@ class numeric_grad(object):
            x[i] += eps
            f_eps = f(*apt)
-            gx[i] = numpy.asarray((f_eps - f_x) / eps)
+            # TODO: remove this when it is clear that the next 
+            # replacemement does not pose problems of its own.  It was replaced
+            # for its inability to handle complex variables.
+            # gx[i] = numpy.asarray((f_eps - f_x) / eps)
+            gx[i] = ((f_eps - f_x) / eps)
        if packed_pt:
            self.gf = self.gf[0]
@@ -712,7 +721,7 @@ class numeric_grad(object):
        return (max_arg, pos[max_arg], abs_errs[max_arg], rel_errs[max_arg])
-def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
+def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, out_type=None, abs_tol=None,
                rel_tol=None, mode=None, cast_to_output_type=False):
    """ Test a gradient by Finite Difference Method. Raise error on failure.
@@ -736,6 +745,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
        of sum(u * fun) at pt
    :param eps: stepsize used in the Finite Difference Method (Default
        None is type-dependent)
+    :param out_type: dtype of output, if complex (i.e. 'complex32' or
+        'complex64')
    :param abs_tol: absolute tolerance used as threshold for gradient
        comparison
    :param rel_tol: relative tolerance used as threshold for gradient
@@ -761,7 +772,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
            raise TypeError(('verify_grad can work only with floating point '
                'inputs, but input %i has dtype "%s".') % (i, p.dtype))
-    _type_tol = dict(  # relativ error tolerances for different types
+    _type_tol = dict(  # relative error tolerances for different types
            float32=1e-2,
            float64=1e-4)
@@ -839,7 +850,7 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None,
    grad_fn = function(tensor_pt, symbolic_grad)
    for test_num in xrange(n_tests):
-        num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps)
+        num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps, out_type)
        analytic_grad = grad_fn(*[p.copy() for p in pt])

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -1170,27 +1170,21 @@ class Mul(ScalarOp):
    def grad(self, inputs, (gz, )):
        retval = []
-        for input in inputs:
+        input_type = theano.tensor.as_tensor_variable(inputs).type
-            if input.type in continuous_types:
+        if input_type in discrete_types:
-                if gz.type in complex_types:
+            retval = None
-                    # zr+zi = (xr + xi)(yr + yi)
+        elif input_type in complex_types or gz.type in complex_types:
-                    # zr+zi = (xr*yr - xi*yi) + (xr yi + xi yr )
+            for input in inputs:
-                    otherprod = mul(*(utils.difference(inputs, [input])))
+                retval += [mul(*([gz] +
-                    yr = real(otherprod)
+                                 utils.difference(inputs, [input])))]
-                    yi = imag(otherprod)
+        else:
-                    if input.type in complex_types:
+            for input in inputs:
-                        retval += [complex(yr * real(gz) + yi * imag(gz),
+                retval += [cast(mul(*([gz] +
-                                           yr * imag(gz) - yi * real(gz))]
+                                      utils.difference(inputs, [input]))),
-                    else:
+                                input_type.dtype)]
-                        retval += [cast(yr * real(gz) + yi * imag(gz),
-                                        input.type.dtype)]
-                else:
-                    retval += [cast(mul(*([gz] + utils.difference(inputs,
-                                                                  [input]))),
-                                    input.type.dtype)]
-            else:
-                retval += [None]
        return retval
 mul = Mul(upcast_out, name='mul')

--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
-import theano
 import numpy as np
+import numpy
+import theano
 import basic
+from theano import gof, tensor, function, scalar
+from theano.sandbox.linalg.ops import diag
 class DiffOp(theano.Op):
@@ -348,3 +352,143 @@ def repeat(x, repeats, axis=None):
    """
    return RepeatOp(axis=axis)(x, repeats)
+class Bartlett(gof.Op):
+    """
+    An instance of this class returns the Bartlett spectral window in the
+    time-domain. The Bartlett window is very similar to a triangular window,
+    except that the end points are at zero. It is often used in signal
+    processing for tapering a signal, without generating too much ripple in
+    the frequency domain.
+    input : (integer scalar) Number of points in the output window. If zero or
+    less, an empty vector is returned.
+    output : (vector of doubles) The triangular window, with the maximum value
+    normalized to one (the value one appears only if the number of samples is
+    odd), with the first and last samples equal to zero.
+    """
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(type(self))
+    def __str__(self):
+        return self.__class__.__name__
+    def make_node(self, M):
+        M = tensor.as_tensor_variable(M)
+        if M.ndim != 0:
+            raise TypeError('%s only works on scalar input'
+                            % self.__class__.__name__)
+        elif (not M.dtype.startswith('int')) and \
+              (not M.dtype.startswith('uint')):
+        # dtype is a theano attribute here
+            raise TypeError('%s only works on integer input'
+                            % self.__class__.__name__)
+        return gof.Apply(self, [M], [tensor.dvector()])
+    def perform(self, node, inputs, out_):
+        M = inputs[0]
+        out, = out_
+        out[0] = numpy.bartlett(M)
+    def infer_shape(self, node, in_shapes):
+        temp = node.inputs[0]
+        M = tensor.switch(tensor.lt(temp, 0),
+            tensor.cast(0, temp.dtype), temp)
+        return [[M]]
+    def grad(self, inputs, output_grads):
+        return [None for i in inputs]
+bartlett = Bartlett()
+class FillDiagonal(gof.Op):
+    """
+    An instance of this class returns a copy of an array with all elements of
+    the main diagonal set to a specified scalar value.
+    inputs:
+    a : Rectangular array of at least two dimensions.
+    val : Scalar value to fill the diagonal whose type must be compatible with
+    that of array 'a' (i.e. 'val' cannot be viewed as an upcast of 'a').
+    output:
+    An array identical to 'a' except that its main diagonal is filled with
+    scalar 'val'. (For an array 'a' with a.ndim >= 2, the main diagonal is the
+    list of locations a[i, i, ..., i] (i.e. with indices all identical).)
+    Support rectangular matrix and tensor with more then 2 dimensions
+    if the later have all dimensions are equals.
+    """
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash_(self):
+        return hash(type(self))
+    def __str__(self):
+        return self.__class__.__name__
+    def infer_shape(self, node, in_shapes):
+        return [in_shapes[0]]
+    def make_node(self, a, val):
+        a = tensor.as_tensor_variable(a)
+        val = tensor.as_tensor_variable(val)
+        if a.ndim < 2:
+            raise TypeError('%s: first parameter must have at least'
+                            ' two dimensions' % self.__class__.__name__)
+        elif val.ndim != 0:
+            raise TypeError('%s: second parameter must be a scalar'
+                            % self.__class__.__name__)
+        val = tensor.cast(val, dtype=scalar.upcast(a.dtype, val.dtype))
+        if val.dtype != a.dtype:
+            raise TypeError('%s: type of second parameter must be compatible'
+                          ' with first\'s' % self.__class__.__name__)
+        return gof.Apply(self, [a, val], [a.type()])
+    def perform(self, node, inputs, output_storage):
+        a = inputs[0].copy()
+        val = inputs[1]
+        if a.ndim == 2:
+            # numpy.fill_diagonal up to date(including 1.6.2) have a
+            # bug for tall matrix.
+            # For 2-d arrays, we accept rectangular ones.
+            step = a.shape[1] + 1
+            end = a.shape[1] * a.shape[1]
+            # Write the value out into the diagonal.
+            a.flat[:end:step] = val
+        else:
+            numpy.fill_diagonal(a, val)
+        output_storage[0][0] = a
+    def grad(self, inp, cost_grad):
+        """
+        Note: The gradient is currently implemented for matrices
+        only.
+        """
+        a, val = inp
+        grad = cost_grad[0]
+        if (a.dtype.startswith('complex')):
+            return [None, None]
+        elif a.ndim > 2:
+            raise NotImplementedError('%s: gradient is currently implemented'
+                            ' for matrices only' % self.__class__.__name__)
+        wr_a = fill_diagonal(grad, 0)  # valid for any number of dimensions
+        wr_val = diag(grad).sum()  # diag is only valid for matrices
+        return [wr_a, wr_val]
+fill_diagonal = FillDiagonal()
--- a/theano/tensor/fourier.py
+++ b/theano/tensor/fourier.py
+import theano
+import numpy
+import math
+from theano import gof, tensor, function, scalar
+from theano.sandbox.linalg.ops import diag
+class Fourier(gof.Op):
+    """
+    An instance of this class returns a finite fourier transform calcutated
+    along one dimension of an input array.
+    inputs:
+    a : Array of at least one dimension.  Can be complex.
+    n : Integer, optional. Length of the transformed axis of the output. If n
+    is smaller than the length of the input, the input is cropped. If it is
+    larger, the input is padded with zeros. If n is not given, the length of
+    the input (along the axis specified by axis) is used.
+    axis : Integer, optional. Axis over which to compute the FFT. If not
+    supplied, the last axis is used.
+    output:
+    Complex array.  The input array, transformed along the axis
+    indicated by 'axis' or along the last axis if 'axis' is not specified. It
+    is truncated or zero-padded as required if 'n' is specified.
+    (From numpy.fft.fft's documentation:)
+    The values in the output follow so-called standard order. If A = fft(a, n),
+    then A[0] contains the zero-frequency term (the mean of the signal), which
+    is always purely real for real inputs. Then A[1:n/2] contains the
+    positive-frequency terms, and A[n/2+1:] contains the negative-frequency
+    terms, in order of decreasingly negative frequency. For an even number of
+    input points, A[n/2] represents both positive and negative Nyquist
+    frequency, and is also purely real for real input. For an odd number of
+    input points, A[(n-1)/2] contains the largest positive frequency, while
+    A[(n+1)/2] contains the largest negative frequency.
+    """
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(self.__class__)
+    def __str__(self):
+        return self.__class__.__name__
+    def make_node(self, a, n, axis):
+        a = tensor.as_tensor_variable(a)
+        if a.ndim < 1:
+            raise TypeError('%s: input must be an array, not a scalar' %
+                            self.__class__.__name__)
+        if axis is None:
+            axis = a.ndim - 1
+            axis = tensor.as_tensor_variable(axis)
+        else:
+            axis = tensor.as_tensor_variable(axis)
+            if (not axis.dtype.startswith('int')) and \
+               (not axis.dtype.startswith('uint')):
+                raise TypeError('%s: index of the transformed axis must be'
+                                ' of type integer' % self.__class__.__name__)
+            elif axis.ndim != 0 or (isinstance(axis, tensor.TensorConstant) and
+                                    (axis.data < 0 or axis.data > a.ndim - 1)):
+                raise TypeError('%s: index of the transformed axis must be'
+                                ' a scalar not smaller than 0 and smaller than'
+                            ' dimension of array' % self.__class__.__name__)
+        if n is None:
+            n = a.shape[axis]
+            n = tensor.as_tensor_variable(n)
+        else:
+            n = tensor.as_tensor_variable(n)
+            if (not n.dtype.startswith('int')) and \
+               (not n.dtype.startswith('uint')):
+                raise TypeError('%s: length of the transformed axis must be'
+                                ' of type integer' % self.__class__.__name__)
+            elif n.ndim != 0 or (isinstance(n, tensor.TensorConstant) and
+                                 n.data < 1):
+                raise TypeError('%s: length of the transformed axis must be a'
+                                ' strictly positive scalar'
+                                % self.__class__.__name__)
+        return gof.Apply(self, [a, n, axis], [tensor.TensorType('complex128',
+                        a.type.broadcastable)()])
+    def infer_shape(self, node, in_shapes):
+        shape_a = in_shapes[0]
+        n = node.inputs[1]
+        axis = node.inputs[2]
+        if len(shape_a) == 1:
+            return [(n,)]
+        elif isinstance(axis, tensor.TensorConstant):
+            out_shape = list(shape_a[0: axis.data]) + [n] +\
+            list(shape_a[axis.data + 1:])
+        else:
+            l = len(shape_a)
+            shape_a = tensor.stack(*shape_a)
+            out_shape = tensor.concatenate((shape_a[0: axis], [n],
+                                            shape_a[axis + 1:]))
+            n_splits = [1] * l
+            out_shape = tensor.split(out_shape, n_splits, l)
+            out_shape = [a[0] for a in out_shape]
+        return [out_shape]
+    def perform(self, node, inputs, output_storage):
+        a = inputs[0]
+        n = inputs[1]
+        axis = inputs[2]
+        output_storage[0][0] = numpy.fft.fft(a, n=int(n), axis=axis)
+    def grad(self, inputs, cost_grad):
+        """
+        In defining the gradient, the Finite Fourier Transform is viewed as
+        a complex-differentiable function of a complex variable
+        """
+        a = inputs[0]
+        n = inputs[1]
+        axis = inputs[2]
+        grad = cost_grad[0]
+        if not isinstance(axis, tensor.TensorConstant):
+            raise NotImplementedError('%s: gradient is currently implemented'
+                                      ' only for axis being a Theano constant'
+                                      % self.__class__.__name__)
+        axis = int(axis.data)
+        # notice that the number of actual elements in wrto is independent of
+        # possible padding or truncation:
+        elem = tensor.arange(0, tensor.shape(a)[axis], 1)
+        # accounts for padding:
+        freq = tensor.arange(0, n, 1)
+        outer = tensor.outer(freq, elem)
+        pow_outer = tensor.exp(((-2 * math.pi * 1j) * outer) / (1. * n))
+        res = tensor.tensordot(grad, pow_outer, (axis, 0))
+        # This would be simpler but not implemented by theano:
+        # res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]),
+        # tensor.set_subtensor(res[...,n::], 0, False, False), res)
+        # Instead we resort to that to account for truncation:
+        flip_shape = list(numpy.arange(0, a.ndim)[::-1])
+        res = res.dimshuffle(flip_shape)
+        res = tensor.switch(tensor.lt(n, tensor.shape(a)[axis]),
+                    tensor.set_subtensor(res[n::, ], 0, False, False), res)
+        res = res.dimshuffle(flip_shape)
+        # insures that gradient shape conforms to input shape:
+        out_shape = list(numpy.arange(0, axis)) + [a.ndim - 1] +\
+            list(numpy.arange(axis, a.ndim - 1))
+        res = res.dimshuffle(*out_shape)
+        return [res, None, None]
+fft = Fourier()
--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
-import theano
 import numpy as np
-from theano import tensor as T
+import numpy
-from theano.tests import unittest_tools as utt
+import theano
+from theano.tests import unittest_tools as utt
 from theano.tensor.extra_ops import *
+from theano import tensor as T
+from theano import tensor, function, scalar
 class TestBinCountOp(utt.InferShapeTester):
@@ -185,3 +187,99 @@ class TestRepeatOp(utt.InferShapeTester):
        def repeat_(a):
            return RepeatOp()(a, 3)
        utt.verify_grad(repeat_, [a])
+class TestBartlett(utt.InferShapeTester):
+    def setUp(self):
+        super(TestBartlett, self).setUp()
+        self.op_class = Bartlett
+        self.op = bartlett
+    def test_perform(self):
+        x = tensor.lscalar()
+        f = function([x], self.op(x))
+        M = numpy.random.random_integers(3, 50, size=())
+        assert numpy.allclose(f(M), numpy.bartlett(M))
+        assert numpy.allclose(f(0), numpy.bartlett(0))
+        assert numpy.allclose(f(-1), numpy.bartlett(-1))
+        b = numpy.array([17], dtype='uint8')
+        assert numpy.allclose(f(b[0]), numpy.bartlett(b[0]))
+    def test_infer_shape(self):
+        x = tensor.lscalar()
+        self._compile_and_check([x], [self.op(x)],
+                                [numpy.random.random_integers(3, 50, size=())],
+                                self.op_class)
+        self._compile_and_check([x], [self.op(x)], [0], self.op_class)
+        self._compile_and_check([x], [self.op(x)], [1], self.op_class)
+if __name__ == "__main__":
+    t = TestBartlett('setUp')
+    t.setUp()
+    t.test_perform()
+    t.test_infer_shape()
+class TestFillDiagonal(utt.InferShapeTester):
+    rng = numpy.random.RandomState(43)
+    def setUp(self):
+        super(TestFillDiagonal, self).setUp()
+        self.op_class = FillDiagonal
+        self.op = fill_diagonal
+    def test_perform(self):
+        x = tensor.dmatrix()
+        y = tensor.dscalar()
+        f = function([x, y], fill_diagonal(x, y))
+        for shp in [(8, 8), (5, 8), (8, 5)]:
+            a = numpy.random.rand(*shp)
+            val = numpy.random.rand()
+            out = f(a, val)
+            # We can't use numpy.fill_diagonal as it is bugged.
+            assert numpy.allclose(numpy.diag(out), val)
+            assert (out == val).sum() == min(a.shape)
+        # test for 3d tensor
+        a = numpy.random.rand(3, 3, 3)
+        x = tensor.dtensor3()
+        y = tensor.dscalar()
+        f = function([x, y], fill_diagonal(x, y))
+        val = numpy.random.rand() + 10
+        out = f(a, val)
+        # We can't use numpy.fill_diagonal as it is bugged.
+        assert out[0, 0, 0] == val
+        assert out[1, 1, 1] == val
+        assert out[2, 2, 2] == val
+        assert (out == val).sum() == min(a.shape)
+    def test_gradient(self):
+        utt.verify_grad(fill_diagonal, [numpy.random.rand(5, 8),
+                                        numpy.random.rand()],
+                        n_tests=1, rng=TestFillDiagonal.rng)
+        utt.verify_grad(fill_diagonal, [numpy.random.rand(8, 5),
+                                        numpy.random.rand()],
+                        n_tests=1, rng=TestFillDiagonal.rng)
+    def test_infer_shape(self):
+        z = tensor.dtensor3()
+        x = tensor.dmatrix()
+        y = tensor.dscalar()
+        self._compile_and_check([x, y], [self.op(x, y)],
+                                [numpy.random.rand(8, 5),
+                                 numpy.random.rand()],
+                                self.op_class)
+        self._compile_and_check([z, y], [self.op(z, y)],
+                                [numpy.random.rand(8, 8, 8),
+                                 numpy.random.rand()],
+                                self.op_class)
+if __name__ == "__main__":
+    t = TestFillDiagonal('setUp')
+    t.setUp()
+    t.test_perform()
+    t.test_gradient()
+    t.test_infer_shape()
--- a/theano/tensor/tests/test_fourier.py
+++ b/theano/tensor/tests/test_fourier.py
+import numpy
+import theano
+from theano import tensor
+from theano.tests import unittest_tools as utt
+from theano.tensor.fourier import Fourier, fft
+class TestFourier(utt.InferShapeTester):
+    rng = numpy.random.RandomState(43)
+    def setUp(self):
+        super(TestFourier, self).setUp()
+        self.op_class = Fourier
+        self.op = fft
+    def test_perform(self):
+        a = tensor.dmatrix()
+        f = theano.function([a], self.op(a, n=10, axis=0))
+        a = numpy.random.rand(8, 6)
+        assert numpy.allclose(f(a), numpy.fft.fft(a, 10, 0))
+    def test_infer_shape(self):
+        a = tensor.dvector()
+        self._compile_and_check([a], [self.op(a, 16, 0)],
+                                [numpy.random.rand(12)],
+                               self.op_class)
+        a = tensor.dmatrix()
+        for var in [self.op(a, 16, 1), self.op(a, None, 1),
+                     self.op(a, 16, None), self.op(a, None, None)]:
+            self._compile_and_check([a], [var],
+                                    [numpy.random.rand(12, 4)],
+                                    self.op_class)
+        b = tensor.iscalar()
+        for var in [self.op(a, 16, b), self.op(a, None, b)]:
+            self._compile_and_check([a, b], [var],
+                                    [numpy.random.rand(12, 4), 0],
+                                    self.op_class)
+    def test_gradient(self):
+        def fft_test1(a):
+            return self.op(a, None, None)
+        def fft_test2(a):
+            return self.op(a, None, 0)
+        def fft_test3(a):
+            return self.op(a, 4, None)
+        def fft_test4(a):
+            return self.op(a, 4, 0)
+        pts = [numpy.random.rand(5, 2, 4, 3),
+               numpy.random.rand(2, 3, 4),
+               numpy.random.rand(2, 5),
+               numpy.random.rand(5)]
+        for fft_test in [fft_test1, fft_test2, fft_test3, fft_test4]:
+            for pt in pts:
+                theano.gradient.verify_grad(fft_test, [pt],
+                                            n_tests=1, rng=TestFourier.rng,
+                                            out_type='complex64')
+if __name__ == "__main__":
+    t = TestFourier('setUp')
+    t.setUp()
+    t.test_perform()
+    t.test_infer_shape()
+    t.test_gradient()