gpuarray fft: guard pycuda import, add unit test, change interface to curfft

46cf0822 · slefrancois · 04183580 · 46cf0822 · 46cf0822
--- a/theano/gpuarray/fft.py
+++ b/theano/gpuarray/fft.py
@@ -3,6 +3,7 @@ from __future__ import absolute_import, print_function, division
 import numpy as np
 import theano
 from theano import Op
+import theano.tensor as T
 from theano.gpuarray import (basic_ops, GpuArrayType)
@@ -12,7 +13,11 @@ try:
 except ImportError:
    pygpu_available = False
-import pycuda.driver
+try:
+    import pycuda.driver
+    pycuda_available = True
+except ImportError:
+    pycuda_available = False
 try:
    import scikits.cuda
@@ -22,21 +27,21 @@ except (ImportError, Exception):
    scikits_cuda_available = False
-class CuFFTOp(Op):
+class CuRFFTOp(Op):
    """
-    Performs a fast Fourier transform on the GPU using the scikits CUDA FFT
+    Operator for the fast Fourier transform of a real-valued output on the GPU
-    through the gpuarray backend.
+    using the scikits CUDA FFT through the gpuarray backend.
-    The input must be a float32 variable of dimensions (m, n). It
+    The input must be a real-valued float32 variable of dimensions (m, n). It
    performs m 1-D FFTs of size n each.
    The output is a GpuArray of dimensions (m, n/2+1, 2). The output contains
-    the n/2+1 non-trivial elements of the m real-valued FFTs. The real
+    the n//2+1 non-trivial elements of the m real-valued FFTs. The real
-    and imaginary parts stored as two float32 arrays, emulating complex64.
+    and imaginary parts are stored as two float32 arrays, emulating complex64.
    Since theano does not support complex number operations, care must be
    taken to manually implement operators such as multiplication.
-    The module provides the convenience function cufft(input).
+    The module provides the convenience function curfft(input).
    """
    __props__ = ()
@@ -54,6 +59,9 @@ class CuFFTOp(Op):
        if not pygpu_available:
            raise RuntimeError("pygpu is needed for CuFFTOp")
+        if not pycuda_available:
+            raise RuntimeError("pycuda is needed for CuFFTOp")
        inp = basic_ops.gpu_contiguous(
            basic_ops.as_gpuarray_variable(inp,
                                           basic_ops.infer_context_name(inp)))
@@ -121,21 +129,11 @@ class CuFFTOp(Op):
        return thunk
-cufft = CuFFTOp()
-"""
-Convenience function for CuFFTOp.
-Parameters
----------
-input
-    Array of float32 of size (m, n), containing m inputs of length n.
-"""
-class CuIFFTOp(Op):
+class CuIRFFTOp(Op):
    """
-    Performs an inverse fast Fourier transform on the GPU using the
+    Operator for the inverse fast Fourier transform with real-valued output
-    scikits CUDA FFT through the gpuarray backend.
+    on the GPU using the scikits CUDA FFT through the gpuarray backend.
    The input is a variable of dimensions (m, n/2+1, 2) with
    type float32 representing the n/2+1 non-trivial elements of m
@@ -143,11 +141,11 @@ class CuIFFTOp(Op):
    parts are stored as two float32 arrays, emulating complex64 given that
    Theano does not support complex numbers.
-    The output is a float32 variable of dimensions (m, n) giving the m
+    The output is a real-valued float32 variable of dimensions (m, n)
-    inverse FFTs. *The output is NOT normalized*. You can manualy divide
+    giving the m inverse FFTs. *The output is NOT normalized*. You can
-    by the size of the output array to normalize.
+    manualy divide by the size of the output array to normalize.
-    The module provides the convenience function cuifft(input).
+    The module provides the convenience function cuirfft(input).
    """
    __props__ = ()
@@ -160,11 +158,14 @@ class CuIFFTOp(Op):
    def make_node(self, inp):
        if not scikits_cuda_available:
-            raise RuntimeError("scikits.cuda is needed for CuFFTOp")
+            raise RuntimeError("scikits.cuda is needed for CuIFFTOp")
        if not pygpu_available:
-            raise RuntimeError("pygpu is needed for CuFFTOp")
+            raise RuntimeError("pygpu is needed for CuIFFTOp")
-        # inp = as_gpuarray_variable(inp)
+        if not pycuda_available:
+            raise RuntimeError("pycuda is needed for CuIFFTOp")
        inp = basic_ops.gpu_contiguous(
            basic_ops.as_gpuarray_variable(inp,
                                           basic_ops.infer_context_name(inp)))
@@ -221,6 +222,10 @@ class CuIFFTOp(Op):
                output_pycuda.sync()
                fft.ifft(input_pycuda, output_pycuda, plan[0])
+                # strangely enough, enabling rescaling here makes it run
+                # very, very slowly.  so do this rescaling manually
+                # afterwards!
                # Sync results to ensure output contains completed computation
                pycuda.driver.Context.synchronize()
@@ -230,13 +235,34 @@ class CuIFFTOp(Op):
        return thunk
-cuifft = CuIFFTOp()
-"""
-Convenience function for CuIFFTOp.
-Parameters
+def curfft(inputs):
----------
+    """
-input
+    Performs the real unitary fast Fourier Transform normalized
+    by :math:`\sqrt n`.
+    Parameters
+    ----------
+    inputs
+        Array of real-valued float32 of size (m, n), containing m inputs of
+        length n.
+    """
+    fft_op = CuRFFTOp()
+    return fft_op(inputs) / T.sqrt(((inputs.shape[1:]).prod()).astype('float32'))
+def cuirfft(inputs):
+    """
+    Performs the real unitary fast inverse Fourier Transform normalized
+    by :math:`\sqrt n`.
+    Parameters
+    ----------
+    inputs
        Array of float32 of size (m, n/2+1, 2), containing m inputs with n/2+1
-    non-trivial elements and real and imaginary parts stored as separate arrays.
+        non-trivial elements and real and imaginary parts stored as separate
-"""
+        arrays.
+    """
+    ifft_op = CuIRFFTOp()
+    return ifft_op(inputs) / T.sqrt((((inputs.shape[1:-1] - 1) * 2).prod())
+                                    .astype('float32'))
--- a/theano/gpuarray/tests/test_fft.py
+++ b/theano/gpuarray/tests/test_fft.py
@@ -3,58 +3,58 @@ import unittest
 import numpy as np
 import theano
+import theano.tensor
 from theano.tests import unittest_tools as utt
+import theano.gpuarray.fft
+import numpy.fft
+from .config import mode_with_gpu
 # Skip tests if pygpu is not available.
 from nose.plugins.skip import SkipTest
 from theano.gpuarray.fft import pygpu_available, scikits_cuda_available
+from theano.gpuarray.fft import pycuda_available
 if not pygpu_available:  # noqa
    raise SkipTest('Optional package pygpu not available')
 if not scikits_cuda_available:  # noqa
    raise SkipTest('Optional package scikits.cuda not available')
+if not pycuda_available:  # noqa
-import theano.gpuarray.fft
+    raise SkipTest('Optional package pycuda not available')
-import theano.tensor.fourier
-from .config import mode_with_gpu
 class TestFFT(unittest.TestCase):
-    def test_fft(self):
+    def test_rfft(self):
        N = 64
        inputs_val = np.random.random((1, N)).astype('float32')
        inputs = theano.shared(inputs_val)
-        fft_ref = theano.tensor.fourier.fft(inputs, N, 1)
+        rfft = theano.gpuarray.fft.curfft(inputs)
-        fft = theano.gpuarray.fft.cufft(inputs)
+        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
+        res_rfft = f_rfft()
-        f_ref = theano.function([], fft_ref)
+        res_rfft_comp = (np.asarray(res_rfft[:, :, 0]) +
-        f_fft = theano.function([], fft, mode=mode_with_gpu)
+                         1j * np.asarray(res_rfft[:, :, 1]))
-        res_ref = f_ref()
-        res_fft = f_fft()
-        res_fft_comp = (np.asarray(res_fft[:, :, 0]) +
+        rfft_ref = numpy.fft.rfft(inputs_val, N, 1, norm='ortho')
-                        1j * np.asarray(res_fft[:, :, 1]))
-        utt.assert_allclose(res_ref[0][0:N / 2 + 1], res_fft_comp)
+        utt.assert_allclose(rfft_ref, res_rfft_comp)
-    def test_ifft(self):
+    def test_irfft(self):
        N = 64
        inputs_val = np.random.random((1, N)).astype('float32')
        inputs = theano.shared(inputs_val)
-        fft = theano.gpuarray.fft.cufft(inputs)
+        fft = theano.gpuarray.fft.curfft(inputs)
        f_fft = theano.function([], fft, mode=mode_with_gpu)
        res_fft = f_fft()
        m = fft.type()
-        ifft = theano.gpuarray.fft.cuifft(m)
+        ifft = theano.gpuarray.fft.cuirfft(m)
        f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
        res_ifft = f_ifft(res_fft)
-        utt.assert_allclose(inputs_val, np.asarray(res_ifft) / N)
+        utt.assert_allclose(inputs_val, np.asarray(res_ifft))
    def test_type(self):
        N = 64
@@ -62,6 +62,6 @@ class TestFFT(unittest.TestCase):
        inputs = theano.shared(inputs_val)
        with self.assertRaises(AssertionError):
-            theano.gpuarray.fft.cufft(inputs)
+            theano.gpuarray.fft.curfft(inputs)
        with self.assertRaises(AssertionError):
-            theano.gpuarray.fft.cuifft(inputs)
+            theano.gpuarray.fft.cuirfft(inputs)