Implemented numpy fft op for CPU and added op_lifter for gpu transfer

178fcbd5 · slefrancois · efb9499f · 178fcbd5 · 178fcbd5 · 178fcbd5
--- a/doc/library/gpuarray/fft.txt
+++ b/doc/library/gpuarray/fft.txt
@@ -10,7 +10,7 @@ FFT gradients are implemented as the opposite Fourier transform of the output gr
 .. warning ::
    The real and imaginary parts of the Fourier domain arrays are stored as a pair of float32
-    array, emulating complex64. Since theano has limited support for complex
+    arrays, emulating complex64. Since theano has limited support for complex
    number operations, care must be taken to manually implement operations such as gradients.
 .. automodule:: theano.gpuarray.fft
@@ -33,7 +33,7 @@ shifted to the middle of the array. The Theano flag ``device=cuda{0,1...}`` must
    f_rfft = theano.function([x], rfft)
    N = 1024
-    box = np.zeros((1,N), dtype='float32')
+    box = np.zeros((1, N), dtype='float32')
    box[:, N/2-10: N/2+10] = 1
    out = f_rfft(box)

--- a/doc/library/tensor/fft.txt
+++ b/doc/library/tensor/fft.txt
+.. _libdoc_tensor_fft:
+==============================================
+:mod:`tensor.fft` -- Fast Fourier Transforms
+==============================================
+Performs Fast Fourier Transforms (FFT).
+FFT gradients are implemented as the opposite Fourier transform of the output gradients.
+.. warning ::
+    The real and imaginary parts of the Fourier domain arrays are stored as a pair of float
+    arrays, emulating complex. Since theano has limited support for complex
+    number operations, care must be taken to manually implement operations such as gradients.
+.. automodule:: theano.tensor.fft
+   :members: rfft, irfft
+For example, the code below performs the real input FFT of a box function,
+which is a sinc function. The absolute value is plotted, since the phase
+oscillates due to the box function being shifted to the middle of the array.
+.. testcode::
+    import numpy as np
+    import theano
+    import theano.tensor as T
+    from theano.tensor import fft
+    x = T.matrix('x', dtype='float64')
+    rfft = fft.rfft(x, norm='ortho')
+    f_rfft = theano.function([x], rfft)
+    N = 1024
+    box = np.zeros((1, N), dtype='float64')
+    box[:, N/2-10: N/2+10] = 1
+    out = f_rfft(box)
+    c_out = np.asarray(out[0, :, 0] + 1j*out[0, :, 1])
+    abs_out = abs(c_out)
+.. image:: plot_fft.png
--- a/doc/library/tensor/index.txt
+++ b/doc/library/tensor/index.txt
@@ -29,3 +29,4 @@ They are grouped into the following sections:
    opt
    slinalg
    nlinalg
+    fft
--- a/doc/library/tensor/plot_fft.png
+++ b/doc/library/tensor/plot_fft.png
--- a/theano/gpuarray/__init__.py
+++ b/theano/gpuarray/__init__.py
@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
                   GpuArraySharedVariable, gpuarray_shared_constructor,
                   reg_context, get_context, ContextNotDefined)
 from .basic_ops import as_gpuarray_variable
-from . import dnn, opt, nerv, extra_ops
+from . import fft, dnn, opt, nerv, extra_ops
 def transfer(x, target):
    try:

--- a/theano/gpuarray/fft.py
+++ b/theano/gpuarray/fft.py
@@ -8,6 +8,9 @@ from theano.gradient import DisconnectedType
 from theano.gpuarray import (basic_ops, GpuArrayType)
+import theano.tensor.fft
+from .opt import register_opt, op_lifter
 try:
    import pygpu
    pygpu_available = True
@@ -21,8 +24,8 @@ except ImportError:
    pycuda_available = False
 try:
-    import scikits.cuda
+    import skcuda
-    from scikits.cuda import fft
+    from skcuda import fft
    scikits_cuda_available = True
 except (ImportError, Exception):
    scikits_cuda_available = False
@@ -47,7 +50,7 @@ class CuRFFTOp(Op):
        # The effect of padding on gradients has yet to be investigated.
        if not scikits_cuda_available:
-            raise RuntimeError("scikits.cuda is needed for CuFFTOp")
+            raise RuntimeError("skcuda is needed for CuFFTOp")
        if not pygpu_available:
            raise RuntimeError("pygpu is needed for CuFFTOp")
@@ -77,7 +80,7 @@ class CuRFFTOp(Op):
        # Initiliaze cuda context to the input's.
        with node.inputs[0].type.context:
-            scikits.cuda.misc.init()
+            skcuda.misc.init()
        plan_input_shape = [None]
        plan = [None]
@@ -108,7 +111,7 @@ class CuRFFTOp(Op):
            input_pycuda = inputs[0][0]
            # I thought we'd need to change the type on output_pycuda
-            # so it is complex64, but as it turns out scikits.cuda.fft
+            # so it is complex64, but as it turns out skcuda.fft
            # doesn't really care either way and treats the array as
            # if it is complex64 anyway.
            output_pycuda = z[0]
@@ -172,7 +175,7 @@ class CuIRFFTOp(Op):
        # The effect of padding on gradients has yet to be investigated.
        if not scikits_cuda_available:
-            raise RuntimeError("scikits.cuda is needed for CuIFFTOp")
+            raise RuntimeError("skcuda is needed for CuIFFTOp")
        if not pygpu_available:
            raise RuntimeError("pygpu is needed for CuIFFTOp")
@@ -202,7 +205,7 @@ class CuIRFFTOp(Op):
        # Initiliaze cuda context to the input's.
        with node.inputs[0].type.context:
-            scikits.cuda.misc.init()
+            skcuda.misc.init()
        plan_input_shape = [None]
        plan = [None]
@@ -231,7 +234,7 @@ class CuIRFFTOp(Op):
            input_pycuda = inputs[0][0]
            # input_pycuda is a float32 array with an extra dimension,
-            # but will be interpreted by scikits.cuda as a complex64
+            # but will be interpreted by skcuda as a complex64
            # array instead.
            output_pycuda = z[0]
@@ -366,3 +369,15 @@ def _unitary(norm):
        raise ValueError("Invalid value %s for norm, must be None, 'ortho' or "
                         "'no norm'" % norm)
    return norm
+if scikits_cuda_available:
+    @register_opt('fast_compile')
+    @op_lifter([theano.tensor.fft.RFFTOp])
+    def local_curfft_op(node, context_name):
+        return curfft_op
+    @register_opt('fast_compile')
+    @op_lifter([theano.tensor.fft.IRFFTOp])
+    def local_cuirfft_op(node, context_name):
+        return cuirfft_op
--- a/theano/gpuarray/tests/test_fft.py
+++ b/theano/gpuarray/tests/test_fft.py
@@ -9,6 +9,8 @@ from theano.tests import unittest_tools as utt
 import theano.gpuarray.fft
 import numpy.fft
+from .config import mode_with_gpu
 # Skip tests if pygpu is not available.
 from nose.plugins.skip import SkipTest
 from theano.gpuarray.fft import pygpu_available, scikits_cuda_available, pycuda_available
@@ -30,7 +32,7 @@ class TestFFT(unittest.TestCase):
        x = T.matrix('x', dtype='float32')
        rfft = theano.gpuarray.fft.curfft(x)
-        f_rfft = theano.function([x], rfft)
+        f_rfft = theano.function([x], rfft, mode=mode_with_gpu)
        res_rfft = f_rfft(inputs_val)
        res_rfft_comp = (np.asarray(res_rfft[:, :, 0]) +
                         1j * np.asarray(res_rfft[:, :, 1]))
@@ -41,7 +43,7 @@ class TestFFT(unittest.TestCase):
        m = rfft.type()
        irfft = theano.gpuarray.fft.cuirfft(m)
-        f_irfft = theano.function([m], irfft)
+        f_irfft = theano.function([m], irfft, mode=mode_with_gpu)
        res_irfft = f_irfft(res_rfft)
        utt.assert_allclose(inputs_val, np.asarray(res_irfft))
@@ -65,7 +67,7 @@ class TestFFT(unittest.TestCase):
        inputs = theano.shared(inputs_val)
        rfft = theano.gpuarray.fft.curfft(inputs)
-        f_rfft = theano.function([], rfft)
+        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
        res_rfft = f_rfft()
        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
                         1j * np.asarray(res_rfft[:, :, :, 1]))
@@ -79,16 +81,28 @@ class TestFFT(unittest.TestCase):
        inputs = theano.shared(inputs_val)
        fft = theano.gpuarray.fft.curfft(inputs)
-        f_fft = theano.function([], fft)
+        f_fft = theano.function([], fft, mode=mode_with_gpu)
        res_fft = f_fft()
        m = fft.type()
        ifft = theano.gpuarray.fft.cuirfft(m)
-        f_ifft = theano.function([m], ifft)
+        f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
        res_ifft = f_ifft(res_fft)
        utt.assert_allclose(inputs_val, np.asarray(res_ifft))
+        inputs_val = numpy.random.random((1, N, N, 2)).astype('float32')
+        inputs = theano.shared(inputs_val)
+        irfft = theano.gpuarray.fft.cuirfft(inputs)
+        f_irfft = theano.function([], irfft)
+        res_irfft = f_irfft()
+        inputs_ref = inputs_val[..., 0] + inputs_val[..., 1] * 1j
+        irfft_ref = np.fft.irfftn(inputs_ref, axes=(1, 2))
+        utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
    def test_type(self):
        inputs_val = np.random.random((1, N)).astype('float64')
        inputs = theano.shared(inputs_val)
@@ -104,7 +118,7 @@ class TestFFT(unittest.TestCase):
        # Unitary normalization
        rfft = theano.gpuarray.fft.curfft(inputs, norm='ortho')
-        f_rfft = theano.function([], rfft)
+        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
        res_rfft = f_rfft()
        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
                         1j * np.asarray(res_rfft[:, :, :, 1]))
@@ -116,7 +130,7 @@ class TestFFT(unittest.TestCase):
        # No normalization
        rfft = theano.gpuarray.fft.curfft(inputs, norm='no_norm')
-        f_rfft = theano.function([], rfft)
+        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
        res_rfft = f_rfft()
        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
                         1j * np.asarray(res_rfft[:, :, :, 1]))
@@ -131,7 +145,7 @@ class TestFFT(unittest.TestCase):
        # Unitary normalization inverse FFT
        irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho')
-        f_irfft = theano.function([], irfft)
+        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
        res_irfft = f_irfft()
        irfft_ref_ortho = numpy.fft.irfftn(
@@ -142,7 +156,7 @@ class TestFFT(unittest.TestCase):
        # No normalization inverse FFT
        irfft = theano.gpuarray.fft.cuirfft(inputs, norm='no_norm')
-        f_irfft = theano.function([], irfft)
+        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
        res_irfft = f_irfft()
        utt.assert_allclose(irfft_ref_ortho * np.sqrt(N * N),
@@ -180,7 +194,7 @@ class TestFFT(unittest.TestCase):
        inputs = theano.shared(inputs_val)
        rfft = theano.gpuarray.fft.curfft(inputs)
-        f_rfft = theano.function([], rfft)
+        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
        res_rfft = f_rfft()
        res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
@@ -192,7 +206,7 @@ class TestFFT(unittest.TestCase):
        m = rfft.type()
        ifft = theano.gpuarray.fft.cuirfft(m, is_odd=True)
-        f_ifft = theano.function([m], ifft)
+        f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
        res_ifft = f_ifft(res_rfft)
        utt.assert_allclose(inputs_val, np.asarray(res_ifft))
@@ -201,7 +215,7 @@ class TestFFT(unittest.TestCase):
        inputs = theano.shared(inputs_val)
        irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho', is_odd=True)
-        f_irfft = theano.function([], irfft)
+        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
        res_irfft = f_irfft()
        inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
@@ -246,3 +260,4 @@ class TestFFT(unittest.TestCase):
            theano.gpuarray.fft.cuirfft(inputs, norm=123)
        with self.assertRaises(ValueError):
            theano.gpuarray.fft.cuirfft(inputs, is_odd=123)
--- a/theano/sandbox/fourier.py
+++ b/theano/sandbox/fourier.py
@@ -12,6 +12,13 @@ from six.moves import xrange
 from theano import tensor
 from theano.gof import Op, Apply, generic
+# This module will soon be deprecated.
+import warnings
+message = ("The module theano.sandbox.fourier will soon be deprecated."
+           " Please use theano.tensor.fft, which supports gradients and "
+           "automatic optimization transfers to the GPU ops.")
+warnings.warn(message)
 class GradTodo(Op):
    # TODO : need description for class

--- a/theano/tensor/fft.py
+++ b/theano/tensor/fft.py
+from __future__ import absolute_import, print_function, division
+import numpy as np
+from theano import gof
+import theano.tensor as T
+from theano.gradient import DisconnectedType
+class RFFTOp(gof.Op):
+    __props__ = ()
+    def output_type(self, inp):
+        # add extra dim for real/imag
+        return T.TensorType(inp.dtype,
+                            broadcastable=[False] * (inp.type.ndim + 1))
+    def make_node(self, a, s=None):
+        a = T.as_tensor_variable(a)
+        if a.ndim < 2:
+            raise TypeError('%s: input must have dimension > 2, with first dimension batches' %
+                            self.__class__.__name__)
+        if s is None:
+            s = a.shape[1:]
+            s = T.as_tensor_variable(s)
+        else:
+            s = T.as_tensor_variable(s)
+            if (not s.dtype.startswith('int')) and \
+               (not s.dtype.startswith('uint')):
+                raise TypeError('%s: length of the transformed axis must be'
+                                ' of type integer' % self.__class__.__name__)
+        return gof.Apply(self, [a, s], [self.output_type(a)()])
+    def perform(self, node, inputs, output_storage):
+        a = inputs[0]
+        s = inputs[1]
+        A = np.fft.rfftn(a, s=tuple(s))
+        # Format output with two extra dimensions for real and imaginary
+        # parts.
+        out = np.zeros(A.shape + (2,), dtype=a.dtype)
+        out[..., 0], out[..., 1] = np.real(A), np.imag(A)
+        output_storage[0][0] = out
+    def grad(self, inputs, output_grads):
+        gout, = output_grads
+        s = inputs[1]
+        # Divide the last dimension of the output gradients by 2, they are
+        # double-counted by the real-IFFT due to symmetry, except the first
+        # and last elements (for even transforms) which are unique.
+        idx = [slice(None)] * (gout.ndim - 2) \
+            + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
+        gout = T.set_subtensor(gout[idx], gout[idx] * 0.5)
+        return [irfft_op(gout, s), DisconnectedType()()]
+    def connection_pattern(self, node):
+        # Specificy that shape input parameter has no connection to graph and gradients.
+        return [[True], [False]]
+rfft_op = RFFTOp()
+class IRFFTOp(gof.Op):
+    __props__ = ()
+    def output_type(self, inp):
+        # remove extra dim for real/imag
+        return T.TensorType(inp.dtype,
+                            broadcastable=[False] * (inp.type.ndim - 1))
+    def make_node(self, a, s=None):
+        a = T.as_tensor_variable(a)
+        if a.ndim < 3:
+            raise TypeError('%s: input must have dimension >= 3,  with ' %
+                            self.__class__.__name__ +
+                            'first dimension batches and last real/imag parts')
+        if s is None:
+            s = a.shape[1:-1]
+            s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
+            s = T.as_tensor_variable(s)
+        else:
+            s = T.as_tensor_variable(s)
+            if (not s.dtype.startswith('int')) and \
+               (not s.dtype.startswith('uint')):
+                raise TypeError('%s: length of the transformed axis must be'
+                                ' of type integer' % self.__class__.__name__)
+        return gof.Apply(self, [a, s], [self.output_type(a)()])
+    def perform(self, node, inputs, output_storage):
+        a = inputs[0]
+        s = inputs[1]
+        # Reconstruct complex array from two float dimensions
+        inp = a[..., 0] + 1j * a[..., 1]
+        out = np.fft.irfftn(inp, s=tuple(s))
+        # Remove numpy's default normalization
+        # Cast to input type (numpy outputs float64 by default)
+        output_storage[0][0] = (out * s.prod()).astype(a.dtype)
+    def grad(self, inputs, output_grads):
+        gout, = output_grads
+        s = inputs[1]
+        gf = rfft_op(gout, s)
+        # Multiply the last dimension of the gradient by 2, they represent
+        # both positive and negative frequencies, except the first
+        # and last elements (for even transforms) which are unique.
+        idx = [slice(None)] * (gf.ndim - 2) \
+            + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
+        gf = T.set_subtensor(gf[idx], gf[idx] * 2)
+        return [gf, DisconnectedType()()]
+    def connection_pattern(self, node):
+        # Specificy that shape input parameter has no connection to graph and gradients.
+        return [[True], [False]]
+irfft_op = IRFFTOp()
+def rfft(inp, norm=None):
+    """
+    Performs the fast Fourier transform of a real-valued input.
+    The input must be a real-valued variable of dimensions (m, ..., n).
+    It performs FFTs of size (..., n) on m batches.
+    The output is a tensor of dimensions (m, ..., n//2+1, 2). The second to
+    last dimension of the output contains the n//2+1 non-trivial elements of
+    the real-valued FFTs. The real and imaginary parts are stored as a pair of
+    float arrays.
+    Parameters
+    ----------
+    inp
+        Array of floats of size (m, ..., n), containing m inputs of
+        size (..., n).
+    norm : {None, 'ortho', 'no_norm'}
+        Normalization of transform. Following numpy, default *None* normalizes
+        only the inverse transform by n, 'ortho' yields the unitary transform
+        (:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves
+        the transform unnormalized.
+    """
+    s = inp.shape[1:]
+    cond_norm = _unitary(norm)
+    scaling = 1
+    if cond_norm == "ortho":
+        scaling = T.sqrt(s.prod().astype(inp.dtype))
+    return rfft_op(inp, s) / scaling
+def irfft(inp, norm=None, is_odd=False):
+    """
+    Performs the inverse fast Fourier Transform with real-valued output.
+    The input is a variable of dimensions (m, ..., n//2+1, 2)
+    representing the non-trivial elements of m real-valued Fourier transforms
+    of initial size (..., n). The real and imaginary parts are stored as a
+    pair of float arrays.
+    The output is a real-valued variable of dimensions (m, ..., n)
+    giving the m inverse FFTs.
+    Parameters
+    ----------
+    inp
+        Array of size (m, ..., n//2+1, 2), containing m inputs
+        with n//2+1 non-trivial elements on the last dimension and real
+        and imaginary parts stored as separate real arrays.
+    norm : {None, 'ortho', 'no_norm'}
+        Normalization of transform. Following numpy, default *None* normalizes
+        only the inverse transform by n, 'ortho' yields the unitary transform
+        (:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves
+        the transform unnormalized.
+    is_odd : {True, False}
+        Set to True to get a real inverse transform output with an odd last dimension
+        of length (N-1)*2 + 1 for an input last dimension of length N.
+    """
+    if is_odd not in (True, False):
+        raise ValueError("Invalid value %s for id_odd, must be True or False" % is_odd)
+    s = inp.shape[1:-1]
+    if is_odd:
+        s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
+    else:
+        s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
+    cond_norm = _unitary(norm)
+    scaling = 1
+    # Numpy's default normalization is 1/N on the inverse transform.
+    if cond_norm is None:
+        scaling = s.prod().astype(inp.dtype)
+    elif cond_norm == "ortho":
+        scaling = T.sqrt(s.prod().astype(inp.dtype))
+    return irfft_op(inp, s) / scaling
+def _unitary(norm):
+    if norm not in (None, "ortho", "no_norm"):
+        raise ValueError("Invalid value %s for norm, must be None, 'ortho' or "
+                         "'no norm'" % norm)
+    return norm
--- a/theano/tensor/fourier.py
+++ b/theano/tensor/fourier.py
@@ -6,6 +6,10 @@ from theano import gof, tensor
 class Fourier(gof.Op):
    """
+    WARNING: for officially supported FFTs, use theano.tensor.fft, which
+    provides real-input FFTs. Gradients are supported, as well as optimization
+    transfers to GPU ops.
    An instance of this class returns a finite fourier transform calcutated
    along one dimension of an input array.
@@ -141,3 +145,4 @@ class Fourier(gof.Op):
 fft = Fourier()
--- a/theano/tensor/tests/test_fft.py
+++ b/theano/tensor/tests/test_fft.py
+from __future__ import absolute_import, print_function, division
+import numpy
+import unittest
+import theano
+from theano import tensor as T
+from theano.tests import unittest_tools as utt
+from theano.tensor import fft
+N = 16
+class TestFFT(unittest.TestCase):
+    def test_rfft_float(self):
+        # Test that numpy's default float64 output is cast to theano input type
+        eps = 1e-1
+        def f_rfft(inp):
+            return fft.rfft(inp)
+        inputs_val = numpy.random.random((1, N)).astype('float32')
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return fft.irfft(inp)
+        inputs_val = numpy.random.random((1, N // 2 + 1, 2)).astype('float32')
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+    def test_1Drfft(self):
+        inputs_val = numpy.random.random((1, N))
+        x = T.matrix('x')
+        rfft = fft.rfft(x)
+        f_rfft = theano.function([x], rfft)
+        res_rfft = f_rfft(inputs_val)
+        res_rfft_comp = (numpy.asarray(res_rfft[:, :, 0]) +
+                         1j * numpy.asarray(res_rfft[:, :, 1]))
+        rfft_ref = numpy.fft.rfft(inputs_val, axis=1)
+        utt.assert_allclose(rfft_ref, res_rfft_comp)
+        m = rfft.type()
+        print(m.ndim)
+        irfft = fft.irfft(m)
+        f_irfft = theano.function([m], irfft)
+        res_irfft = f_irfft(res_rfft)
+        utt.assert_allclose(inputs_val, numpy.asarray(res_irfft))
+        # The numerical gradient of the FFT is sensitive, must set large
+        # enough epsilon to get good accuracy.
+        eps = 1e-1
+        def f_rfft(inp):
+            return fft.rfft(inp)
+        inputs_val = numpy.random.random((1, N))
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return fft.irfft(inp)
+        inputs_val = numpy.random.random((1, N // 2 + 1, 2))
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+    def test_rfft(self):
+        inputs_val = numpy.random.random((1, N, N))
+        inputs = theano.shared(inputs_val)
+        rfft = fft.rfft(inputs)
+        f_rfft = theano.function([], rfft)
+        res_rfft = f_rfft()
+        res_rfft_comp = (numpy.asarray(res_rfft[:, :, :, 0]) +
+                         1j * numpy.asarray(res_rfft[:, :, :, 1]))
+        rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
+        utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
+    def test_irfft(self):
+        inputs_val = numpy.random.random((1, N, N))
+        inputs = theano.shared(inputs_val)
+        rfft = fft.rfft(inputs)
+        f_rfft = theano.function([], rfft)
+        res_fft = f_rfft()
+        m = rfft.type()
+        irfft = fft.irfft(m)
+        f_irfft = theano.function([m], irfft)
+        res_irfft = f_irfft(res_fft)
+        utt.assert_allclose(inputs_val, numpy.asarray(res_irfft))
+        inputs_val = numpy.random.random((1, N, N, 2))
+        inputs = theano.shared(inputs_val)
+        irfft = fft.irfft(inputs)
+        f_irfft = theano.function([], irfft)
+        res_irfft = f_irfft()
+        inputs_ref = inputs_val[..., 0] + inputs_val[..., 1] * 1j
+        irfft_ref = numpy.fft.irfftn(inputs_ref, axes=(1, 2))
+        utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
+    def test_norm_rfft(self):
+        inputs_val = numpy.random.random((1, N, N))
+        inputs = theano.shared(inputs_val)
+        # Unitary normalization
+        rfft = fft.rfft(inputs, norm='ortho')
+        f_rfft = theano.function([], rfft)
+        res_rfft = f_rfft()
+        res_rfft_comp = (numpy.asarray(res_rfft[:, :, :, 0]) +
+                         1j * numpy.asarray(res_rfft[:, :, :, 1]))
+        rfft_ref_ortho = numpy.fft.rfftn(inputs_val, axes=(1, 2), norm='ortho')
+        utt.assert_allclose(rfft_ref_ortho, res_rfft_comp,
+                            atol=1e-4, rtol=1e-4)
+        # No normalization
+        rfft = fft.rfft(inputs, norm='no_norm')
+        f_rfft = theano.function([], rfft)
+        res_rfft = f_rfft()
+        res_rfft_comp = (numpy.asarray(res_rfft[:, :, :, 0]) +
+                         1j * numpy.asarray(res_rfft[:, :, :, 1]))
+        utt.assert_allclose(rfft_ref_ortho * numpy.sqrt(N * N),
+                            res_rfft_comp, atol=1e-4, rtol=1e-4)
+        # Inverse FFT inputs
+        inputs_val = numpy.random.random((1, N, N // 2 + 1, 2))
+        inputs = theano.shared(inputs_val)
+        inputs_ref = inputs_val[..., 0] + 1j * inputs_val[..., 1]
+        # Unitary normalization inverse FFT
+        irfft = fft.irfft(inputs, norm='ortho')
+        f_irfft = theano.function([], irfft)
+        res_irfft = f_irfft()
+        irfft_ref_ortho = numpy.fft.irfftn(inputs_ref, axes=(1, 2), norm='ortho')
+        utt.assert_allclose(irfft_ref_ortho,
+                            res_irfft, atol=1e-4, rtol=1e-4)
+        # No normalization inverse FFT
+        irfft = fft.irfft(inputs, norm='no_norm')
+        f_irfft = theano.function([], irfft)
+        res_irfft = f_irfft()
+        utt.assert_allclose(irfft_ref_ortho * numpy.sqrt(N * N),
+                            res_irfft, atol=1e-4, rtol=1e-4)
+    def test_params(self):
+        inputs_val = numpy.random.random((1, N))
+        inputs = theano.shared(inputs_val)
+        with self.assertRaises(ValueError):
+            fft.rfft(inputs, norm=123)
+        inputs_val = numpy.random.random((1, N // 2 + 1, 2))
+        inputs = theano.shared(inputs_val)
+        with self.assertRaises(ValueError):
+            fft.irfft(inputs, norm=123)
+        with self.assertRaises(ValueError):
+            fft.irfft(inputs, is_odd=123)
+    def test_grad_rfft(self):
+        # The numerical gradient of the FFT is sensitive, must set large
+        # enough epsilon to get good accuracy.
+        eps = 1e-1
+        def f_rfft(inp):
+            return fft.rfft(inp)
+        inputs_val = numpy.random.random((1, N, N)).astype('float32')
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return fft.irfft(inp)
+        inputs_val = numpy.random.random((1, N, N // 2 + 1, 2)).astype('float32')
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)
+        def f_rfft(inp):
+            return fft.rfft(inp, norm='ortho')
+        inputs_val = numpy.random.random((1, N, N)).astype('float32')
+        utt.verify_grad(f_rfft, [inputs_val], eps=eps)
+        def f_irfft(inp):
+            return fft.irfft(inp, norm='no_norm')
+        inputs_val = numpy.random.random((1, N, N // 2 + 1, 2)).astype('float32')
+        utt.verify_grad(f_irfft, [inputs_val], eps=eps)