Merge pull request #4535 from slefrancois/fft_gpuarray

Port FFTs to gpuarray backend

Merge pull request #4535 from slefrancois/fft_gpuarray
f0a9781a · abergeron · GitHub · 76afe07b · 7adcb4b9 · f0a9781a
--- a/doc/library/gpuarray/fft.txt
+++ b/doc/library/gpuarray/fft.txt
+.. _libdoc_gpuarray_fft:
+==============================================
+:mod:`gpuarray.fft` -- Fast Fourier Transforms
+==============================================
+Performs Fast Fourier Transforms (FFT) on the GPU.
+FFT gradients are implemented as the opposite Fourier transform of the output gradients.
+.. note ::
+    You must install `scikit-cuda <http://scikit-cuda.readthedocs.io/en/latest>`_
+    to compute Fourier transforms on the GPU.
+.. warning ::
+    The real and imaginary parts of the Fourier domain arrays are stored as a pair of float32
+    arrays, emulating complex64. Since theano has limited support for complex
+    number operations, care must be taken to manually implement operations such as gradients.
+.. automodule:: theano.gpuarray.fft
+   :members: curfft, cuirfft
+For example, the code below performs the real input FFT of a box function, which is a sinc function.
+The absolute value is plotted, since the phase oscillates due to the box function being
+shifted to the middle of the array. The Theano flag ``device=cuda{0,1...}`` must be used.
+.. testcode::
+    import numpy as np
+    import theano
+    import theano.tensor as T
+    from theano.gpuarray import fft
+    x = T.matrix('x', dtype='float32')
+    rfft = fft.curfft(x, norm='ortho')
+    f_rfft = theano.function([x], rfft)
+    N = 1024
+    box = np.zeros((1, N), dtype='float32')
+    box[:, N/2-10: N/2+10] = 1
+    out = f_rfft(box)
+    c_out = np.asarray(out[0, :, 0] + 1j*out[0, :, 1])
+    abs_out = abs(c_out)
+.. testoutput::
+   :hide:
+   :options: +SKIP
+   ...
+.. image:: plot_fft.png
--- a/doc/library/gpuarray/index.txt
+++ b/doc/library/gpuarray/index.txt
@@ -15,5 +15,6 @@
    op
    dnn
+    fft
    type
    extra
--- a/doc/library/gpuarray/plot_fft.png
+++ b/doc/library/gpuarray/plot_fft.png
--- a/doc/library/tensor/fft.txt
+++ b/doc/library/tensor/fft.txt
+.. _libdoc_tensor_fft:
+==============================================
+:mod:`tensor.fft` -- Fast Fourier Transforms
+==============================================
+Performs Fast Fourier Transforms (FFT).
+FFT gradients are implemented as the opposite Fourier transform of the output gradients.
+.. warning ::
+    The real and imaginary parts of the Fourier domain arrays are stored as a pair of float
+    arrays, emulating complex. Since theano has limited support for complex
+    number operations, care must be taken to manually implement operations such as gradients.
+.. automodule:: theano.tensor.fft
+   :members: rfft, irfft
+For example, the code below performs the real input FFT of a box function,
+which is a sinc function. The absolute value is plotted, since the phase
+oscillates due to the box function being shifted to the middle of the array.
+.. testcode::
+    import numpy as np
+    import theano
+    import theano.tensor as T
+    from theano.tensor import fft
+    x = T.matrix('x', dtype='float64')
+    rfft = fft.rfft(x, norm='ortho')
+    f_rfft = theano.function([x], rfft)
+    N = 1024
+    box = np.zeros((1, N), dtype='float64')
+    box[:, N/2-10: N/2+10] = 1
+    out = f_rfft(box)
+    c_out = np.asarray(out[0, :, 0] + 1j*out[0, :, 1])
+    abs_out = abs(c_out)
+.. image:: plot_fft.png
--- a/doc/library/tensor/index.txt
+++ b/doc/library/tensor/index.txt
@@ -29,3 +29,4 @@ They are grouped into the following sections:
    opt
    slinalg
    nlinalg
+    fft
--- a/doc/library/tensor/plot_fft.png
+++ b/doc/library/tensor/plot_fft.png
--- a/theano/gpuarray/__init__.py
+++ b/theano/gpuarray/__init__.py
@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
                   GpuArraySharedVariable, gpuarray_shared_constructor,
                   reg_context, get_context, ContextNotDefined)
 from .basic_ops import as_gpuarray_variable
-from . import dnn, opt, nerv, extra_ops
+from . import fft, dnn, opt, nerv, extra_ops
 def transfer(x, target):
    try:

--- a/theano/gpuarray/fft.py
+++ b/theano/gpuarray/fft.py
--- a/theano/gpuarray/tests/test_fft.py
+++ b/theano/gpuarray/tests/test_fft.py
+from __future__ import absolute_import, print_function, division
+import unittest
+import numpy as np
+import theano
+import theano.tensor as T
+from theano.tests import unittest_tools as utt
+import theano.gpuarray.fft
+import numpy.fft
+from .config import mode_with_gpu
+# Skip tests if pygpu is not available.
+from nose.plugins.skip import SkipTest
+from theano.gpuarray.fft import pygpu_available, scikits_cuda_available, pycuda_available
+if not pygpu_available:  # noqa
+    raise SkipTest('Optional package pygpu not available')
+if not scikits_cuda_available:  # noqa
+    raise SkipTest('Optional package scikits.cuda not available')
+if not pycuda_available:  # noqa
+    raise SkipTest('Optional package pycuda not available')
+# Transform sizes
+N = 32
+class TestFFT(unittest.TestCase):
+    def test_1Dfft(self):
+        inputs_val = np.random.random((1, N)).astype('float32')
+        x = T.matrix('x', dtype='float32')
+        rfft = theano.gpuarray.fft.curfft(x)
+        f_rfft = theano.function([x], rfft, mode=mode_with_gpu)
+        res_rfft = f_rfft(inputs_val)
+        res_rfft_comp = (np.asarray(res_rfft[:, :, 0]) +
+                         1j * np.asarray(res_rfft[:, :, 1]))
+        rfft_ref = numpy.fft.rfft(inputs_val, axis=1)
+        utt.assert_allclose(rfft_ref, res_rfft_comp)
+        m = rfft.type()
+        irfft = theano.gpuarray.fft.cuirfft(m)
+        f_irfft = theano.function([m], irfft, mode=mode_with_gpu)
+        res_irfft = f_irfft(res_rfft)
+        utt.assert_allclose(inputs_val, np.asarray(res_irfft))
+        # The numerical gradient of the FFT is sensitive, must set large
+        # enough epsilon to get good accuracy.
+        eps = 1e-1
+        def f_rfft(inp):
+            return theano.gpuarray.fft.curfft(inp)
+        inputs_val = np.random.random((1, N)).astype('float32')
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return theano.gpuarray.fft.cuirfft(inp)
+        inputs_val = np.random.random((1, N // 2 + 1, 2)).astype('float32')
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+    def test_rfft(self):
+        inputs_val = np.random.random((1, N, N)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        rfft = theano.gpuarray.fft.curfft(inputs)
+        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
+        res_rfft = f_rfft()
+        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
+                         1j * np.asarray(res_rfft[:, :, :, 1]))
+        rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
+        utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
+    def test_irfft(self):
+        inputs_val = np.random.random((1, N, N)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        fft = theano.gpuarray.fft.curfft(inputs)
+        f_fft = theano.function([], fft, mode=mode_with_gpu)
+        res_fft = f_fft()
+        m = fft.type()
+        ifft = theano.gpuarray.fft.cuirfft(m)
+        f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
+        res_ifft = f_ifft(res_fft)
+        utt.assert_allclose(inputs_val, np.asarray(res_ifft))
+        inputs_val = numpy.random.random((1, N, N, 2)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        irfft = theano.gpuarray.fft.cuirfft(inputs)
+        f_irfft = theano.function([], irfft)
+        res_irfft = f_irfft()
+        inputs_ref = inputs_val[..., 0] + inputs_val[..., 1] * 1j
+        irfft_ref = np.fft.irfftn(inputs_ref, axes=(1, 2))
+        utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
+    def test_type(self):
+        inputs_val = np.random.random((1, N)).astype('float64')
+        inputs = theano.shared(inputs_val)
+        with self.assertRaises(AssertionError):
+            theano.gpuarray.fft.curfft(inputs)
+        with self.assertRaises(AssertionError):
+            theano.gpuarray.fft.cuirfft(inputs)
+    def test_norm(self):
+        inputs_val = np.random.random((1, N, N)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        # Unitary normalization
+        rfft = theano.gpuarray.fft.curfft(inputs, norm='ortho')
+        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
+        res_rfft = f_rfft()
+        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
+                         1j * np.asarray(res_rfft[:, :, :, 1]))
+        rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
+        utt.assert_allclose(rfft_ref / N, res_rfft_comp, atol=1e-4, rtol=1e-4)
+        # No normalization
+        rfft = theano.gpuarray.fft.curfft(inputs, norm='no_norm')
+        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
+        res_rfft = f_rfft()
+        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
+                         1j * np.asarray(res_rfft[:, :, :, 1]))
+        utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
+        # Inverse FFT inputs
+        inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
+        # Unitary normalization inverse FFT
+        irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho')
+        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
+        res_irfft = f_irfft()
+        irfft_ref = numpy.fft.irfftn(inputs_ref, axes=(1, 2))
+        utt.assert_allclose(irfft_ref * N, res_irfft, atol=1e-4, rtol=1e-4)
+        # No normalization inverse FFT
+        irfft = theano.gpuarray.fft.cuirfft(inputs, norm='no_norm')
+        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
+        res_irfft = f_irfft()
+        utt.assert_allclose(irfft_ref * N**2, res_irfft, atol=1e-4, rtol=1e-4)
+    def test_grad(self):
+        # The numerical gradient of the FFT is sensitive, must set large
+        # enough epsilon to get good accuracy.
+        eps = 1e-1
+        def f_rfft(inp):
+            return theano.gpuarray.fft.curfft(inp)
+        inputs_val = np.random.random((1, N, N)).astype('float32')
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return theano.gpuarray.fft.cuirfft(inp)
+        inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype('float32')
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+        def f_rfft(inp):
+            return theano.gpuarray.fft.curfft(inp, norm='ortho')
+        inputs_val = np.random.random((1, N, N)).astype('float32')
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return theano.gpuarray.fft.cuirfft(inp, norm='no_norm')
+        inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype('float32')
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+    def test_odd(self):
+        M = N - 1
+        inputs_val = np.random.random((1, M, M)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        rfft = theano.gpuarray.fft.curfft(inputs)
+        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
+        res_rfft = f_rfft()
+        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
+                         1j * np.asarray(res_rfft[:, :, :, 1]))
+        rfft_ref = numpy.fft.rfftn(inputs_val, s=(M, M), axes=(1, 2))
+        utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
+        m = rfft.type()
+        ifft = theano.gpuarray.fft.cuirfft(m, is_odd=True)
+        f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
+        res_ifft = f_ifft(res_rfft)
+        utt.assert_allclose(inputs_val, np.asarray(res_ifft))
+        inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho', is_odd=True)
+        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
+        res_irfft = f_irfft()
+        inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
+        irfft_ref = numpy.fft.irfftn(
+            inputs_ref, s=(M, M), axes=(1, 2), norm='ortho')
+        utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
+        # The numerical gradient of the FFT is sensitive, must set large
+        # enough epsilon to get good accuracy.
+        eps = 1e-1
+        def f_rfft(inp):
+            return theano.gpuarray.fft.curfft(inp)
+        inputs_val = np.random.random((1, M, M)).astype('float32')
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return theano.gpuarray.fft.cuirfft(inp, is_odd=True)
+        inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32')
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+        def f_rfft(inp):
+            return theano.gpuarray.fft.curfft(inp, norm='ortho')
+        inputs_val = np.random.random((1, M, M)).astype('float32')
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return theano.gpuarray.fft.cuirfft(inp, norm='no_norm', is_odd=True)
+        inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32')
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+    def test_params(self):
+        inputs_val = numpy.random.random((1, N)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        self.assertRaises(ValueError, theano.gpuarray.fft.curfft, inputs, norm=123)
+        inputs_val = numpy.random.random((1, N // 2 + 1, 2)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        self.assertRaises(ValueError, theano.gpuarray.fft.cuirfft, inputs, norm=123)
+        self.assertRaises(ValueError, theano.gpuarray.fft.cuirfft, inputs, is_odd=123)
--- a/theano/sandbox/fourier.py
+++ b/theano/sandbox/fourier.py
@@ -12,6 +12,14 @@ from six.moves import xrange
 from theano import tensor
 from theano.gof import Op, Apply, generic
+# This module will soon be deprecated.
+import warnings
+message = ("The module theano.sandbox.fourier will soon be deprecated."
+           " Please use theano.tensor.fft, which supports gradients and "
+           "automatic optimization transfers to the GPU ops.")
+warnings.warn(message)
 class GradTodo(Op):
    # TODO : need description for class

--- a/theano/tensor/fft.py
+++ b/theano/tensor/fft.py
+from __future__ import absolute_import, print_function, division
+import numpy as np
+from theano import gof
+import theano.tensor as T
+from theano.gradient import DisconnectedType
+class RFFTOp(gof.Op):
+    __props__ = ()
+    def output_type(self, inp):
+        # add extra dim for real/imag
+        return T.TensorType(inp.dtype,
+                            broadcastable=[False] * (inp.type.ndim + 1))
+    def make_node(self, a, s=None):
+        a = T.as_tensor_variable(a)
+        if a.ndim < 2:
+            raise TypeError('%s: input must have dimension > 2, with first dimension batches' %
+                            self.__class__.__name__)
+        if s is None:
+            s = a.shape[1:]
+            s = T.as_tensor_variable(s)
+        else:
+            s = T.as_tensor_variable(s)
+            if (not s.dtype.startswith('int')) and \
+               (not s.dtype.startswith('uint')):
+                raise TypeError('%s: length of the transformed axis must be'
+                                ' of type integer' % self.__class__.__name__)
+        return gof.Apply(self, [a, s], [self.output_type(a)()])
+    def perform(self, node, inputs, output_storage):
+        a = inputs[0]
+        s = inputs[1]
+        A = np.fft.rfftn(a, s=tuple(s))
+        # Format output with two extra dimensions for real and imaginary
+        # parts.
+        out = np.zeros(A.shape + (2,), dtype=a.dtype)
+        out[..., 0], out[..., 1] = np.real(A), np.imag(A)
+        output_storage[0][0] = out
+    def grad(self, inputs, output_grads):
+        gout, = output_grads
+        s = inputs[1]
+        # Divide the last dimension of the output gradients by 2, they are
+        # double-counted by the real-IFFT due to symmetry, except the first
+        # and last elements (for even transforms) which are unique.
+        idx = [slice(None)] * (gout.ndim - 2) \
+            + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
+        gout = T.set_subtensor(gout[idx], gout[idx] * 0.5)
+        return [irfft_op(gout, s), DisconnectedType()()]
+    def connection_pattern(self, node):
+        # Specificy that shape input parameter has no connection to graph and gradients.
+        return [[True], [False]]
+rfft_op = RFFTOp()
+class IRFFTOp(gof.Op):
+    __props__ = ()
+    def output_type(self, inp):
+        # remove extra dim for real/imag
+        return T.TensorType(inp.dtype,
+                            broadcastable=[False] * (inp.type.ndim - 1))
+    def make_node(self, a, s=None):
+        a = T.as_tensor_variable(a)
+        if a.ndim < 3:
+            raise TypeError('%s: input must have dimension >= 3,  with ' %
+                            self.__class__.__name__ +
+                            'first dimension batches and last real/imag parts')
+        if s is None:
+            s = a.shape[1:-1]
+            s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
+            s = T.as_tensor_variable(s)
+        else:
+            s = T.as_tensor_variable(s)
+            if (not s.dtype.startswith('int')) and \
+               (not s.dtype.startswith('uint')):
+                raise TypeError('%s: length of the transformed axis must be'
+                                ' of type integer' % self.__class__.__name__)
+        return gof.Apply(self, [a, s], [self.output_type(a)()])
+    def perform(self, node, inputs, output_storage):
+        a = inputs[0]
+        s = inputs[1]
+        # Reconstruct complex array from two float dimensions
+        inp = a[..., 0] + 1j * a[..., 1]
+        out = np.fft.irfftn(inp, s=tuple(s))
+        # Remove numpy's default normalization
+        # Cast to input type (numpy outputs float64 by default)
+        output_storage[0][0] = (out * s.prod()).astype(a.dtype)
+    def grad(self, inputs, output_grads):
+        gout, = output_grads
+        s = inputs[1]
+        gf = rfft_op(gout, s)
+        # Multiply the last dimension of the gradient by 2, they represent
+        # both positive and negative frequencies, except the first
+        # and last elements (for even transforms) which are unique.
+        idx = [slice(None)] * (gf.ndim - 2) \
+            + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
+        gf = T.set_subtensor(gf[idx], gf[idx] * 2)
+        return [gf, DisconnectedType()()]
+    def connection_pattern(self, node):
+        # Specificy that shape input parameter has no connection to graph and gradients.
+        return [[True], [False]]
+irfft_op = IRFFTOp()
+def rfft(inp, norm=None):
+    """
+    Performs the fast Fourier transform of a real-valued input.
+    The input must be a real-valued variable of dimensions (m, ..., n).
+    It performs FFTs of size (..., n) on m batches.
+    The output is a tensor of dimensions (m, ..., n//2+1, 2). The second to
+    last dimension of the output contains the n//2+1 non-trivial elements of
+    the real-valued FFTs. The real and imaginary parts are stored as a pair of
+    float arrays.
+    Parameters
+    ----------
+    inp
+        Array of floats of size (m, ..., n), containing m inputs of
+        size (..., n).
+    norm : {None, 'ortho', 'no_norm'}
+        Normalization of transform. Following numpy, default *None* normalizes
+        only the inverse transform by n, 'ortho' yields the unitary transform
+        (:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves
+        the transform unnormalized.
+    """
+    s = inp.shape[1:]
+    cond_norm = _unitary(norm)
+    scaling = 1
+    if cond_norm == "ortho":
+        scaling = T.sqrt(s.prod().astype(inp.dtype))
+    return rfft_op(inp, s) / scaling
+def irfft(inp, norm=None, is_odd=False):
+    """
+    Performs the inverse fast Fourier Transform with real-valued output.
+    The input is a variable of dimensions (m, ..., n//2+1, 2)
+    representing the non-trivial elements of m real-valued Fourier transforms
+    of initial size (..., n). The real and imaginary parts are stored as a
+    pair of float arrays.
+    The output is a real-valued variable of dimensions (m, ..., n)
+    giving the m inverse FFTs.
+    Parameters
+    ----------
+    inp
+        Array of size (m, ..., n//2+1, 2), containing m inputs
+        with n//2+1 non-trivial elements on the last dimension and real
+        and imaginary parts stored as separate real arrays.
+    norm : {None, 'ortho', 'no_norm'}
+        Normalization of transform. Following numpy, default *None* normalizes
+        only the inverse transform by n, 'ortho' yields the unitary transform
+        (:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves
+        the transform unnormalized.
+    is_odd : {True, False}
+        Set to True to get a real inverse transform output with an odd last dimension
+        of length (N-1)*2 + 1 for an input last dimension of length N.
+    """
+    if is_odd not in (True, False):
+        raise ValueError("Invalid value %s for id_odd, must be True or False" % is_odd)
+    s = inp.shape[1:-1]
+    if is_odd:
+        s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
+    else:
+        s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
+    cond_norm = _unitary(norm)
+    scaling = 1
+    # Numpy's default normalization is 1/N on the inverse transform.
+    if cond_norm is None:
+        scaling = s.prod().astype(inp.dtype)
+    elif cond_norm == "ortho":
+        scaling = T.sqrt(s.prod().astype(inp.dtype))
+    return irfft_op(inp, s) / scaling
+def _unitary(norm):
+    if norm not in (None, "ortho", "no_norm"):
+        raise ValueError("Invalid value %s for norm, must be None, 'ortho' or "
+                         "'no norm'" % norm)
+    return norm
--- a/theano/tensor/fourier.py
+++ b/theano/tensor/fourier.py
@@ -6,6 +6,10 @@ from theano import gof, tensor
 class Fourier(gof.Op):
    """
+    WARNING: for officially supported FFTs, use theano.tensor.fft, which
+    provides real-input FFTs. Gradients are supported, as well as optimization
+    transfers to GPU ops.
    An instance of this class returns a finite fourier transform calcutated
    along one dimension of an input array.

--- a/theano/tensor/tests/test_fft.py
+++ b/theano/tensor/tests/test_fft.py
+from __future__ import absolute_import, print_function, division
+import numpy
+import unittest
+import theano
+from theano import tensor as T
+from theano.tests import unittest_tools as utt
+from theano.tensor import fft
+N = 16
+class TestFFT(unittest.TestCase):
+    def test_rfft_float(self):
+        # Test that numpy's default float64 output is cast to theano input type
+        eps = 1e-1
+        def f_rfft(inp):
+            return fft.rfft(inp)
+        inputs_val = numpy.random.random((1, N)).astype(theano.config.floatX)
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return fft.irfft(inp)
+        inputs_val = numpy.random.random((1, N // 2 + 1, 2)).astype(theano.config.floatX)
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+    def test_1Drfft(self):
+        inputs_val = numpy.random.random((1, N)).astype(theano.config.floatX)
+        x = T.matrix('x')
+        rfft = fft.rfft(x)
+        f_rfft = theano.function([x], rfft)
+        res_rfft = f_rfft(inputs_val)
+        res_rfft_comp = (numpy.asarray(res_rfft[:, :, 0]) +
+                         1j * numpy.asarray(res_rfft[:, :, 1]))
+        rfft_ref = numpy.fft.rfft(inputs_val, axis=1)
+        utt.assert_allclose(rfft_ref, res_rfft_comp)
+        m = rfft.type()
+        print(m.ndim)
+        irfft = fft.irfft(m)
+        f_irfft = theano.function([m], irfft)
+        res_irfft = f_irfft(res_rfft)
+        utt.assert_allclose(inputs_val, numpy.asarray(res_irfft))
+        # The numerical gradient of the FFT is sensitive, must set large
+        # enough epsilon to get good accuracy.
+        eps = 1e-1
+        def f_rfft(inp):
+            return fft.rfft(inp)
+        inputs_val = numpy.random.random((1, N)).astype(theano.config.floatX)
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return fft.irfft(inp)
+        inputs_val = numpy.random.random((1, N // 2 + 1, 2)).astype(theano.config.floatX)
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+    def test_rfft(self):
+        inputs_val = numpy.random.random((1, N, N)).astype(theano.config.floatX)
+        inputs = theano.shared(inputs_val)
+        rfft = fft.rfft(inputs)
+        f_rfft = theano.function([], rfft)
+        res_rfft = f_rfft()
+        res_rfft_comp = (numpy.asarray(res_rfft[:, :, :, 0]) +
+                         1j * numpy.asarray(res_rfft[:, :, :, 1]))
+        rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
+        utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
+    def test_irfft(self):
+        inputs_val = numpy.random.random((1, N, N)).astype(theano.config.floatX)
+        inputs = theano.shared(inputs_val)
+        rfft = fft.rfft(inputs)
+        f_rfft = theano.function([], rfft)
+        res_fft = f_rfft()
+        m = rfft.type()
+        irfft = fft.irfft(m)
+        f_irfft = theano.function([m], irfft)
+        res_irfft = f_irfft(res_fft)
+        utt.assert_allclose(inputs_val, numpy.asarray(res_irfft))
+        inputs_val = numpy.random.random((1, N, N, 2)).astype(theano.config.floatX)
+        inputs = theano.shared(inputs_val)
+        irfft = fft.irfft(inputs)
+        f_irfft = theano.function([], irfft)
+        res_irfft = f_irfft()
+        inputs_ref = inputs_val[..., 0] + inputs_val[..., 1] * 1j
+        irfft_ref = numpy.fft.irfftn(inputs_ref, axes=(1, 2))
+        utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
+    def test_norm_rfft(self):
+        inputs_val = numpy.random.random((1, N, N)).astype(theano.config.floatX)
+        inputs = theano.shared(inputs_val)
+        # Unitary normalization
+        rfft = fft.rfft(inputs, norm='ortho')
+        f_rfft = theano.function([], rfft)
+        res_rfft = f_rfft()
+        res_rfft_comp = (numpy.asarray(res_rfft[:, :, :, 0]) +
+                         1j * numpy.asarray(res_rfft[:, :, :, 1]))
+        rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
+        utt.assert_allclose(rfft_ref / N, res_rfft_comp, atol=1e-4, rtol=1e-4)
+        # No normalization
+        rfft = fft.rfft(inputs, norm='no_norm')
+        f_rfft = theano.function([], rfft)
+        res_rfft = f_rfft()
+        res_rfft_comp = (numpy.asarray(res_rfft[:, :, :, 0]) +
+                         1j * numpy.asarray(res_rfft[:, :, :, 1]))
+        utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
+        # Inverse FFT inputs
+        inputs_val = numpy.random.random((1, N, N // 2 + 1, 2)).astype(theano.config.floatX)
+        inputs = theano.shared(inputs_val)
+        inputs_ref = inputs_val[..., 0] + 1j * inputs_val[..., 1]
+        # Unitary normalization inverse FFT
+        irfft = fft.irfft(inputs, norm='ortho')
+        f_irfft = theano.function([], irfft)
+        res_irfft = f_irfft()
+        irfft_ref = numpy.fft.irfftn(inputs_ref, axes=(1, 2))
+        utt.assert_allclose(irfft_ref * N, res_irfft, atol=1e-4, rtol=1e-4)
+        # No normalization inverse FFT
+        irfft = fft.irfft(inputs, norm='no_norm')
+        f_irfft = theano.function([], irfft)
+        res_irfft = f_irfft()
+        utt.assert_allclose(irfft_ref * N**2, res_irfft, atol=1e-4, rtol=1e-4)
+    def test_params(self):
+        inputs_val = numpy.random.random((1, N)).astype(theano.config.floatX)
+        inputs = theano.shared(inputs_val)
+        self.assertRaises(ValueError, fft.rfft, inputs, norm=123)
+        inputs_val = numpy.random.random((1, N // 2 + 1, 2)).astype(theano.config.floatX)
+        inputs = theano.shared(inputs_val)
+        self.assertRaises(ValueError, fft.irfft, inputs, norm=123)
+        self.assertRaises(ValueError, fft.irfft, inputs, is_odd=123)
+    def test_grad_rfft(self):
+        # The numerical gradient of the FFT is sensitive, must set large
+        # enough epsilon to get good accuracy.
+        eps = 1e-1
+        def f_rfft(inp):
+            return fft.rfft(inp)
+        inputs_val = numpy.random.random((1, N, N)).astype(theano.config.floatX)
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return fft.irfft(inp)
+        inputs_val = numpy.random.random((1, N, N // 2 + 1, 2)).astype(theano.config.floatX)
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+        def f_rfft(inp):
+            return fft.rfft(inp, norm='ortho')
+        inputs_val = numpy.random.random((1, N, N)).astype(theano.config.floatX)
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return fft.irfft(inp, norm='no_norm')
+        inputs_val = numpy.random.random((1, N, N // 2 + 1, 2)).astype(theano.config.floatX)
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)