提交 178fcbd5 authored 作者: slefrancois's avatar slefrancois

Implemented numpy fft op for CPU and added op_lifter for gpu transfer

上级 efb9499f
...@@ -10,7 +10,7 @@ FFT gradients are implemented as the opposite Fourier transform of the output gr ...@@ -10,7 +10,7 @@ FFT gradients are implemented as the opposite Fourier transform of the output gr
.. warning :: .. warning ::
The real and imaginary parts of the Fourier domain arrays are stored as a pair of float32 The real and imaginary parts of the Fourier domain arrays are stored as a pair of float32
array, emulating complex64. Since theano has limited support for complex arrays, emulating complex64. Since theano has limited support for complex
number operations, care must be taken to manually implement operations such as gradients. number operations, care must be taken to manually implement operations such as gradients.
.. automodule:: theano.gpuarray.fft .. automodule:: theano.gpuarray.fft
...@@ -33,7 +33,7 @@ shifted to the middle of the array. The Theano flag ``device=cuda{0,1...}`` must ...@@ -33,7 +33,7 @@ shifted to the middle of the array. The Theano flag ``device=cuda{0,1...}`` must
f_rfft = theano.function([x], rfft) f_rfft = theano.function([x], rfft)
N = 1024 N = 1024
box = np.zeros((1,N), dtype='float32') box = np.zeros((1, N), dtype='float32')
box[:, N/2-10: N/2+10] = 1 box[:, N/2-10: N/2+10] = 1
out = f_rfft(box) out = f_rfft(box)
......
.. _libdoc_tensor_fft:
==============================================
:mod:`tensor.fft` -- Fast Fourier Transforms
==============================================
Performs Fast Fourier Transforms (FFT).
FFT gradients are implemented as the opposite Fourier transform of the output gradients.
.. warning ::
The real and imaginary parts of the Fourier domain arrays are stored as a pair of float
arrays, emulating complex. Since theano has limited support for complex
number operations, care must be taken to manually implement operations such as gradients.
.. automodule:: theano.tensor.fft
:members: rfft, irfft
For example, the code below performs the real input FFT of a box function,
which is a sinc function. The absolute value is plotted, since the phase
oscillates due to the box function being shifted to the middle of the array.
.. testcode::
import numpy as np
import theano
import theano.tensor as T
from theano.tensor import fft
x = T.matrix('x', dtype='float64')
rfft = fft.rfft(x, norm='ortho')
f_rfft = theano.function([x], rfft)
N = 1024
box = np.zeros((1, N), dtype='float64')
box[:, N/2-10: N/2+10] = 1
out = f_rfft(box)
c_out = np.asarray(out[0, :, 0] + 1j*out[0, :, 1])
abs_out = abs(c_out)
.. image:: plot_fft.png
...@@ -29,3 +29,4 @@ They are grouped into the following sections: ...@@ -29,3 +29,4 @@ They are grouped into the following sections:
opt opt
slinalg slinalg
nlinalg nlinalg
fft
...@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant, ...@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor, GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined) reg_context, get_context, ContextNotDefined)
from .basic_ops import as_gpuarray_variable from .basic_ops import as_gpuarray_variable
from . import dnn, opt, nerv, extra_ops from . import fft, dnn, opt, nerv, extra_ops
def transfer(x, target): def transfer(x, target):
try: try:
......
...@@ -8,6 +8,9 @@ from theano.gradient import DisconnectedType ...@@ -8,6 +8,9 @@ from theano.gradient import DisconnectedType
from theano.gpuarray import (basic_ops, GpuArrayType) from theano.gpuarray import (basic_ops, GpuArrayType)
import theano.tensor.fft
from .opt import register_opt, op_lifter
try: try:
import pygpu import pygpu
pygpu_available = True pygpu_available = True
...@@ -21,8 +24,8 @@ except ImportError: ...@@ -21,8 +24,8 @@ except ImportError:
pycuda_available = False pycuda_available = False
try: try:
import scikits.cuda import skcuda
from scikits.cuda import fft from skcuda import fft
scikits_cuda_available = True scikits_cuda_available = True
except (ImportError, Exception): except (ImportError, Exception):
scikits_cuda_available = False scikits_cuda_available = False
...@@ -47,7 +50,7 @@ class CuRFFTOp(Op): ...@@ -47,7 +50,7 @@ class CuRFFTOp(Op):
# The effect of padding on gradients has yet to be investigated. # The effect of padding on gradients has yet to be investigated.
if not scikits_cuda_available: if not scikits_cuda_available:
raise RuntimeError("scikits.cuda is needed for CuFFTOp") raise RuntimeError("skcuda is needed for CuFFTOp")
if not pygpu_available: if not pygpu_available:
raise RuntimeError("pygpu is needed for CuFFTOp") raise RuntimeError("pygpu is needed for CuFFTOp")
...@@ -77,7 +80,7 @@ class CuRFFTOp(Op): ...@@ -77,7 +80,7 @@ class CuRFFTOp(Op):
# Initiliaze cuda context to the input's. # Initiliaze cuda context to the input's.
with node.inputs[0].type.context: with node.inputs[0].type.context:
scikits.cuda.misc.init() skcuda.misc.init()
plan_input_shape = [None] plan_input_shape = [None]
plan = [None] plan = [None]
...@@ -108,7 +111,7 @@ class CuRFFTOp(Op): ...@@ -108,7 +111,7 @@ class CuRFFTOp(Op):
input_pycuda = inputs[0][0] input_pycuda = inputs[0][0]
# I thought we'd need to change the type on output_pycuda # I thought we'd need to change the type on output_pycuda
# so it is complex64, but as it turns out scikits.cuda.fft # so it is complex64, but as it turns out skcuda.fft
# doesn't really care either way and treats the array as # doesn't really care either way and treats the array as
# if it is complex64 anyway. # if it is complex64 anyway.
output_pycuda = z[0] output_pycuda = z[0]
...@@ -172,7 +175,7 @@ class CuIRFFTOp(Op): ...@@ -172,7 +175,7 @@ class CuIRFFTOp(Op):
# The effect of padding on gradients has yet to be investigated. # The effect of padding on gradients has yet to be investigated.
if not scikits_cuda_available: if not scikits_cuda_available:
raise RuntimeError("scikits.cuda is needed for CuIFFTOp") raise RuntimeError("skcuda is needed for CuIFFTOp")
if not pygpu_available: if not pygpu_available:
raise RuntimeError("pygpu is needed for CuIFFTOp") raise RuntimeError("pygpu is needed for CuIFFTOp")
...@@ -202,7 +205,7 @@ class CuIRFFTOp(Op): ...@@ -202,7 +205,7 @@ class CuIRFFTOp(Op):
# Initiliaze cuda context to the input's. # Initiliaze cuda context to the input's.
with node.inputs[0].type.context: with node.inputs[0].type.context:
scikits.cuda.misc.init() skcuda.misc.init()
plan_input_shape = [None] plan_input_shape = [None]
plan = [None] plan = [None]
...@@ -231,7 +234,7 @@ class CuIRFFTOp(Op): ...@@ -231,7 +234,7 @@ class CuIRFFTOp(Op):
input_pycuda = inputs[0][0] input_pycuda = inputs[0][0]
# input_pycuda is a float32 array with an extra dimension, # input_pycuda is a float32 array with an extra dimension,
# but will be interpreted by scikits.cuda as a complex64 # but will be interpreted by skcuda as a complex64
# array instead. # array instead.
output_pycuda = z[0] output_pycuda = z[0]
...@@ -366,3 +369,15 @@ def _unitary(norm): ...@@ -366,3 +369,15 @@ def _unitary(norm):
raise ValueError("Invalid value %s for norm, must be None, 'ortho' or " raise ValueError("Invalid value %s for norm, must be None, 'ortho' or "
"'no norm'" % norm) "'no norm'" % norm)
return norm return norm
if scikits_cuda_available:
@register_opt('fast_compile')
@op_lifter([theano.tensor.fft.RFFTOp])
def local_curfft_op(node, context_name):
return curfft_op
@register_opt('fast_compile')
@op_lifter([theano.tensor.fft.IRFFTOp])
def local_cuirfft_op(node, context_name):
return cuirfft_op
...@@ -9,6 +9,8 @@ from theano.tests import unittest_tools as utt ...@@ -9,6 +9,8 @@ from theano.tests import unittest_tools as utt
import theano.gpuarray.fft import theano.gpuarray.fft
import numpy.fft import numpy.fft
from .config import mode_with_gpu
# Skip tests if pygpu is not available. # Skip tests if pygpu is not available.
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from theano.gpuarray.fft import pygpu_available, scikits_cuda_available, pycuda_available from theano.gpuarray.fft import pygpu_available, scikits_cuda_available, pycuda_available
...@@ -30,7 +32,7 @@ class TestFFT(unittest.TestCase): ...@@ -30,7 +32,7 @@ class TestFFT(unittest.TestCase):
x = T.matrix('x', dtype='float32') x = T.matrix('x', dtype='float32')
rfft = theano.gpuarray.fft.curfft(x) rfft = theano.gpuarray.fft.curfft(x)
f_rfft = theano.function([x], rfft) f_rfft = theano.function([x], rfft, mode=mode_with_gpu)
res_rfft = f_rfft(inputs_val) res_rfft = f_rfft(inputs_val)
res_rfft_comp = (np.asarray(res_rfft[:, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, 0]) +
1j * np.asarray(res_rfft[:, :, 1])) 1j * np.asarray(res_rfft[:, :, 1]))
...@@ -41,7 +43,7 @@ class TestFFT(unittest.TestCase): ...@@ -41,7 +43,7 @@ class TestFFT(unittest.TestCase):
m = rfft.type() m = rfft.type()
irfft = theano.gpuarray.fft.cuirfft(m) irfft = theano.gpuarray.fft.cuirfft(m)
f_irfft = theano.function([m], irfft) f_irfft = theano.function([m], irfft, mode=mode_with_gpu)
res_irfft = f_irfft(res_rfft) res_irfft = f_irfft(res_rfft)
utt.assert_allclose(inputs_val, np.asarray(res_irfft)) utt.assert_allclose(inputs_val, np.asarray(res_irfft))
...@@ -65,7 +67,7 @@ class TestFFT(unittest.TestCase): ...@@ -65,7 +67,7 @@ class TestFFT(unittest.TestCase):
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
rfft = theano.gpuarray.fft.curfft(inputs) rfft = theano.gpuarray.fft.curfft(inputs)
f_rfft = theano.function([], rfft) f_rfft = theano.function([], rfft, mode=mode_with_gpu)
res_rfft = f_rfft() res_rfft = f_rfft()
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
...@@ -79,16 +81,28 @@ class TestFFT(unittest.TestCase): ...@@ -79,16 +81,28 @@ class TestFFT(unittest.TestCase):
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
fft = theano.gpuarray.fft.curfft(inputs) fft = theano.gpuarray.fft.curfft(inputs)
f_fft = theano.function([], fft) f_fft = theano.function([], fft, mode=mode_with_gpu)
res_fft = f_fft() res_fft = f_fft()
m = fft.type() m = fft.type()
ifft = theano.gpuarray.fft.cuirfft(m) ifft = theano.gpuarray.fft.cuirfft(m)
f_ifft = theano.function([m], ifft) f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
res_ifft = f_ifft(res_fft) res_ifft = f_ifft(res_fft)
utt.assert_allclose(inputs_val, np.asarray(res_ifft)) utt.assert_allclose(inputs_val, np.asarray(res_ifft))
inputs_val = numpy.random.random((1, N, N, 2)).astype('float32')
inputs = theano.shared(inputs_val)
irfft = theano.gpuarray.fft.cuirfft(inputs)
f_irfft = theano.function([], irfft)
res_irfft = f_irfft()
inputs_ref = inputs_val[..., 0] + inputs_val[..., 1] * 1j
irfft_ref = np.fft.irfftn(inputs_ref, axes=(1, 2))
utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
def test_type(self): def test_type(self):
inputs_val = np.random.random((1, N)).astype('float64') inputs_val = np.random.random((1, N)).astype('float64')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
...@@ -104,7 +118,7 @@ class TestFFT(unittest.TestCase): ...@@ -104,7 +118,7 @@ class TestFFT(unittest.TestCase):
# Unitary normalization # Unitary normalization
rfft = theano.gpuarray.fft.curfft(inputs, norm='ortho') rfft = theano.gpuarray.fft.curfft(inputs, norm='ortho')
f_rfft = theano.function([], rfft) f_rfft = theano.function([], rfft, mode=mode_with_gpu)
res_rfft = f_rfft() res_rfft = f_rfft()
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
...@@ -116,7 +130,7 @@ class TestFFT(unittest.TestCase): ...@@ -116,7 +130,7 @@ class TestFFT(unittest.TestCase):
# No normalization # No normalization
rfft = theano.gpuarray.fft.curfft(inputs, norm='no_norm') rfft = theano.gpuarray.fft.curfft(inputs, norm='no_norm')
f_rfft = theano.function([], rfft) f_rfft = theano.function([], rfft, mode=mode_with_gpu)
res_rfft = f_rfft() res_rfft = f_rfft()
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
...@@ -131,7 +145,7 @@ class TestFFT(unittest.TestCase): ...@@ -131,7 +145,7 @@ class TestFFT(unittest.TestCase):
# Unitary normalization inverse FFT # Unitary normalization inverse FFT
irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho') irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho')
f_irfft = theano.function([], irfft) f_irfft = theano.function([], irfft, mode=mode_with_gpu)
res_irfft = f_irfft() res_irfft = f_irfft()
irfft_ref_ortho = numpy.fft.irfftn( irfft_ref_ortho = numpy.fft.irfftn(
...@@ -142,7 +156,7 @@ class TestFFT(unittest.TestCase): ...@@ -142,7 +156,7 @@ class TestFFT(unittest.TestCase):
# No normalization inverse FFT # No normalization inverse FFT
irfft = theano.gpuarray.fft.cuirfft(inputs, norm='no_norm') irfft = theano.gpuarray.fft.cuirfft(inputs, norm='no_norm')
f_irfft = theano.function([], irfft) f_irfft = theano.function([], irfft, mode=mode_with_gpu)
res_irfft = f_irfft() res_irfft = f_irfft()
utt.assert_allclose(irfft_ref_ortho * np.sqrt(N * N), utt.assert_allclose(irfft_ref_ortho * np.sqrt(N * N),
...@@ -180,7 +194,7 @@ class TestFFT(unittest.TestCase): ...@@ -180,7 +194,7 @@ class TestFFT(unittest.TestCase):
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
rfft = theano.gpuarray.fft.curfft(inputs) rfft = theano.gpuarray.fft.curfft(inputs)
f_rfft = theano.function([], rfft) f_rfft = theano.function([], rfft, mode=mode_with_gpu)
res_rfft = f_rfft() res_rfft = f_rfft()
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
...@@ -192,7 +206,7 @@ class TestFFT(unittest.TestCase): ...@@ -192,7 +206,7 @@ class TestFFT(unittest.TestCase):
m = rfft.type() m = rfft.type()
ifft = theano.gpuarray.fft.cuirfft(m, is_odd=True) ifft = theano.gpuarray.fft.cuirfft(m, is_odd=True)
f_ifft = theano.function([m], ifft) f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
res_ifft = f_ifft(res_rfft) res_ifft = f_ifft(res_rfft)
utt.assert_allclose(inputs_val, np.asarray(res_ifft)) utt.assert_allclose(inputs_val, np.asarray(res_ifft))
...@@ -201,7 +215,7 @@ class TestFFT(unittest.TestCase): ...@@ -201,7 +215,7 @@ class TestFFT(unittest.TestCase):
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho', is_odd=True) irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho', is_odd=True)
f_irfft = theano.function([], irfft) f_irfft = theano.function([], irfft, mode=mode_with_gpu)
res_irfft = f_irfft() res_irfft = f_irfft()
inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1] inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
...@@ -246,3 +260,4 @@ class TestFFT(unittest.TestCase): ...@@ -246,3 +260,4 @@ class TestFFT(unittest.TestCase):
theano.gpuarray.fft.cuirfft(inputs, norm=123) theano.gpuarray.fft.cuirfft(inputs, norm=123)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
theano.gpuarray.fft.cuirfft(inputs, is_odd=123) theano.gpuarray.fft.cuirfft(inputs, is_odd=123)
...@@ -12,6 +12,13 @@ from six.moves import xrange ...@@ -12,6 +12,13 @@ from six.moves import xrange
from theano import tensor from theano import tensor
from theano.gof import Op, Apply, generic from theano.gof import Op, Apply, generic
# This module will soon be deprecated.
import warnings
message = ("The module theano.sandbox.fourier will soon be deprecated."
" Please use theano.tensor.fft, which supports gradients and "
"automatic optimization transfers to the GPU ops.")
warnings.warn(message)
class GradTodo(Op): class GradTodo(Op):
# TODO : need description for class # TODO : need description for class
......
from __future__ import absolute_import, print_function, division
import numpy as np
from theano import gof
import theano.tensor as T
from theano.gradient import DisconnectedType
class RFFTOp(gof.Op):
__props__ = ()
def output_type(self, inp):
# add extra dim for real/imag
return T.TensorType(inp.dtype,
broadcastable=[False] * (inp.type.ndim + 1))
def make_node(self, a, s=None):
a = T.as_tensor_variable(a)
if a.ndim < 2:
raise TypeError('%s: input must have dimension > 2, with first dimension batches' %
self.__class__.__name__)
if s is None:
s = a.shape[1:]
s = T.as_tensor_variable(s)
else:
s = T.as_tensor_variable(s)
if (not s.dtype.startswith('int')) and \
(not s.dtype.startswith('uint')):
raise TypeError('%s: length of the transformed axis must be'
' of type integer' % self.__class__.__name__)
return gof.Apply(self, [a, s], [self.output_type(a)()])
def perform(self, node, inputs, output_storage):
a = inputs[0]
s = inputs[1]
A = np.fft.rfftn(a, s=tuple(s))
# Format output with two extra dimensions for real and imaginary
# parts.
out = np.zeros(A.shape + (2,), dtype=a.dtype)
out[..., 0], out[..., 1] = np.real(A), np.imag(A)
output_storage[0][0] = out
def grad(self, inputs, output_grads):
gout, = output_grads
s = inputs[1]
# Divide the last dimension of the output gradients by 2, they are
# double-counted by the real-IFFT due to symmetry, except the first
# and last elements (for even transforms) which are unique.
idx = [slice(None)] * (gout.ndim - 2) \
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
gout = T.set_subtensor(gout[idx], gout[idx] * 0.5)
return [irfft_op(gout, s), DisconnectedType()()]
def connection_pattern(self, node):
# Specificy that shape input parameter has no connection to graph and gradients.
return [[True], [False]]
rfft_op = RFFTOp()
class IRFFTOp(gof.Op):
__props__ = ()
def output_type(self, inp):
# remove extra dim for real/imag
return T.TensorType(inp.dtype,
broadcastable=[False] * (inp.type.ndim - 1))
def make_node(self, a, s=None):
a = T.as_tensor_variable(a)
if a.ndim < 3:
raise TypeError('%s: input must have dimension >= 3, with ' %
self.__class__.__name__ +
'first dimension batches and last real/imag parts')
if s is None:
s = a.shape[1:-1]
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = T.as_tensor_variable(s)
else:
s = T.as_tensor_variable(s)
if (not s.dtype.startswith('int')) and \
(not s.dtype.startswith('uint')):
raise TypeError('%s: length of the transformed axis must be'
' of type integer' % self.__class__.__name__)
return gof.Apply(self, [a, s], [self.output_type(a)()])
def perform(self, node, inputs, output_storage):
a = inputs[0]
s = inputs[1]
# Reconstruct complex array from two float dimensions
inp = a[..., 0] + 1j * a[..., 1]
out = np.fft.irfftn(inp, s=tuple(s))
# Remove numpy's default normalization
# Cast to input type (numpy outputs float64 by default)
output_storage[0][0] = (out * s.prod()).astype(a.dtype)
def grad(self, inputs, output_grads):
gout, = output_grads
s = inputs[1]
gf = rfft_op(gout, s)
# Multiply the last dimension of the gradient by 2, they represent
# both positive and negative frequencies, except the first
# and last elements (for even transforms) which are unique.
idx = [slice(None)] * (gf.ndim - 2) \
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
gf = T.set_subtensor(gf[idx], gf[idx] * 2)
return [gf, DisconnectedType()()]
def connection_pattern(self, node):
# Specificy that shape input parameter has no connection to graph and gradients.
return [[True], [False]]
irfft_op = IRFFTOp()
def rfft(inp, norm=None):
"""
Performs the fast Fourier transform of a real-valued input.
The input must be a real-valued variable of dimensions (m, ..., n).
It performs FFTs of size (..., n) on m batches.
The output is a tensor of dimensions (m, ..., n//2+1, 2). The second to
last dimension of the output contains the n//2+1 non-trivial elements of
the real-valued FFTs. The real and imaginary parts are stored as a pair of
float arrays.
Parameters
----------
inp
Array of floats of size (m, ..., n), containing m inputs of
size (..., n).
norm : {None, 'ortho', 'no_norm'}
Normalization of transform. Following numpy, default *None* normalizes
only the inverse transform by n, 'ortho' yields the unitary transform
(:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves
the transform unnormalized.
"""
s = inp.shape[1:]
cond_norm = _unitary(norm)
scaling = 1
if cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype(inp.dtype))
return rfft_op(inp, s) / scaling
def irfft(inp, norm=None, is_odd=False):
"""
Performs the inverse fast Fourier Transform with real-valued output.
The input is a variable of dimensions (m, ..., n//2+1, 2)
representing the non-trivial elements of m real-valued Fourier transforms
of initial size (..., n). The real and imaginary parts are stored as a
pair of float arrays.
The output is a real-valued variable of dimensions (m, ..., n)
giving the m inverse FFTs.
Parameters
----------
inp
Array of size (m, ..., n//2+1, 2), containing m inputs
with n//2+1 non-trivial elements on the last dimension and real
and imaginary parts stored as separate real arrays.
norm : {None, 'ortho', 'no_norm'}
Normalization of transform. Following numpy, default *None* normalizes
only the inverse transform by n, 'ortho' yields the unitary transform
(:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves
the transform unnormalized.
is_odd : {True, False}
Set to True to get a real inverse transform output with an odd last dimension
of length (N-1)*2 + 1 for an input last dimension of length N.
"""
if is_odd not in (True, False):
raise ValueError("Invalid value %s for id_odd, must be True or False" % is_odd)
s = inp.shape[1:-1]
if is_odd:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
else:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
cond_norm = _unitary(norm)
scaling = 1
# Numpy's default normalization is 1/N on the inverse transform.
if cond_norm is None:
scaling = s.prod().astype(inp.dtype)
elif cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype(inp.dtype))
return irfft_op(inp, s) / scaling
def _unitary(norm):
if norm not in (None, "ortho", "no_norm"):
raise ValueError("Invalid value %s for norm, must be None, 'ortho' or "
"'no norm'" % norm)
return norm
...@@ -6,6 +6,10 @@ from theano import gof, tensor ...@@ -6,6 +6,10 @@ from theano import gof, tensor
class Fourier(gof.Op): class Fourier(gof.Op):
""" """
WARNING: for officially supported FFTs, use theano.tensor.fft, which
provides real-input FFTs. Gradients are supported, as well as optimization
transfers to GPU ops.
An instance of this class returns a finite fourier transform calcutated An instance of this class returns a finite fourier transform calcutated
along one dimension of an input array. along one dimension of an input array.
...@@ -141,3 +145,4 @@ class Fourier(gof.Op): ...@@ -141,3 +145,4 @@ class Fourier(gof.Op):
fft = Fourier() fft = Fourier()
from __future__ import absolute_import, print_function, division
import numpy
import unittest
import theano
from theano import tensor as T
from theano.tests import unittest_tools as utt
from theano.tensor import fft
N = 16
class TestFFT(unittest.TestCase):
def test_rfft_float(self):
# Test that numpy's default float64 output is cast to theano input type
eps = 1e-1
def f_rfft(inp):
return fft.rfft(inp)
inputs_val = numpy.random.random((1, N)).astype('float32')
utt.verify_grad(f_rfft, [inputs_val], eps=eps)
def f_irfft(inp):
return fft.irfft(inp)
inputs_val = numpy.random.random((1, N // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def test_1Drfft(self):
inputs_val = numpy.random.random((1, N))
x = T.matrix('x')
rfft = fft.rfft(x)
f_rfft = theano.function([x], rfft)
res_rfft = f_rfft(inputs_val)
res_rfft_comp = (numpy.asarray(res_rfft[:, :, 0]) +
1j * numpy.asarray(res_rfft[:, :, 1]))
rfft_ref = numpy.fft.rfft(inputs_val, axis=1)
utt.assert_allclose(rfft_ref, res_rfft_comp)
m = rfft.type()
print(m.ndim)
irfft = fft.irfft(m)
f_irfft = theano.function([m], irfft)
res_irfft = f_irfft(res_rfft)
utt.assert_allclose(inputs_val, numpy.asarray(res_irfft))
# The numerical gradient of the FFT is sensitive, must set large
# enough epsilon to get good accuracy.
eps = 1e-1
def f_rfft(inp):
return fft.rfft(inp)
inputs_val = numpy.random.random((1, N))
utt.verify_grad(f_rfft, [inputs_val], eps=eps)
def f_irfft(inp):
return fft.irfft(inp)
inputs_val = numpy.random.random((1, N // 2 + 1, 2))
utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def test_rfft(self):
inputs_val = numpy.random.random((1, N, N))
inputs = theano.shared(inputs_val)
rfft = fft.rfft(inputs)
f_rfft = theano.function([], rfft)
res_rfft = f_rfft()
res_rfft_comp = (numpy.asarray(res_rfft[:, :, :, 0]) +
1j * numpy.asarray(res_rfft[:, :, :, 1]))
rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
def test_irfft(self):
inputs_val = numpy.random.random((1, N, N))
inputs = theano.shared(inputs_val)
rfft = fft.rfft(inputs)
f_rfft = theano.function([], rfft)
res_fft = f_rfft()
m = rfft.type()
irfft = fft.irfft(m)
f_irfft = theano.function([m], irfft)
res_irfft = f_irfft(res_fft)
utt.assert_allclose(inputs_val, numpy.asarray(res_irfft))
inputs_val = numpy.random.random((1, N, N, 2))
inputs = theano.shared(inputs_val)
irfft = fft.irfft(inputs)
f_irfft = theano.function([], irfft)
res_irfft = f_irfft()
inputs_ref = inputs_val[..., 0] + inputs_val[..., 1] * 1j
irfft_ref = numpy.fft.irfftn(inputs_ref, axes=(1, 2))
utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
def test_norm_rfft(self):
inputs_val = numpy.random.random((1, N, N))
inputs = theano.shared(inputs_val)
# Unitary normalization
rfft = fft.rfft(inputs, norm='ortho')
f_rfft = theano.function([], rfft)
res_rfft = f_rfft()
res_rfft_comp = (numpy.asarray(res_rfft[:, :, :, 0]) +
1j * numpy.asarray(res_rfft[:, :, :, 1]))
rfft_ref_ortho = numpy.fft.rfftn(inputs_val, axes=(1, 2), norm='ortho')
utt.assert_allclose(rfft_ref_ortho, res_rfft_comp,
atol=1e-4, rtol=1e-4)
# No normalization
rfft = fft.rfft(inputs, norm='no_norm')
f_rfft = theano.function([], rfft)
res_rfft = f_rfft()
res_rfft_comp = (numpy.asarray(res_rfft[:, :, :, 0]) +
1j * numpy.asarray(res_rfft[:, :, :, 1]))
utt.assert_allclose(rfft_ref_ortho * numpy.sqrt(N * N),
res_rfft_comp, atol=1e-4, rtol=1e-4)
# Inverse FFT inputs
inputs_val = numpy.random.random((1, N, N // 2 + 1, 2))
inputs = theano.shared(inputs_val)
inputs_ref = inputs_val[..., 0] + 1j * inputs_val[..., 1]
# Unitary normalization inverse FFT
irfft = fft.irfft(inputs, norm='ortho')
f_irfft = theano.function([], irfft)
res_irfft = f_irfft()
irfft_ref_ortho = numpy.fft.irfftn(inputs_ref, axes=(1, 2), norm='ortho')
utt.assert_allclose(irfft_ref_ortho,
res_irfft, atol=1e-4, rtol=1e-4)
# No normalization inverse FFT
irfft = fft.irfft(inputs, norm='no_norm')
f_irfft = theano.function([], irfft)
res_irfft = f_irfft()
utt.assert_allclose(irfft_ref_ortho * numpy.sqrt(N * N),
res_irfft, atol=1e-4, rtol=1e-4)
def test_params(self):
inputs_val = numpy.random.random((1, N))
inputs = theano.shared(inputs_val)
with self.assertRaises(ValueError):
fft.rfft(inputs, norm=123)
inputs_val = numpy.random.random((1, N // 2 + 1, 2))
inputs = theano.shared(inputs_val)
with self.assertRaises(ValueError):
fft.irfft(inputs, norm=123)
with self.assertRaises(ValueError):
fft.irfft(inputs, is_odd=123)
def test_grad_rfft(self):
# The numerical gradient of the FFT is sensitive, must set large
# enough epsilon to get good accuracy.
eps = 1e-1
def f_rfft(inp):
return fft.rfft(inp)
inputs_val = numpy.random.random((1, N, N)).astype('float32')
utt.verify_grad(f_rfft, [inputs_val], eps=eps)
def f_irfft(inp):
return fft.irfft(inp)
inputs_val = numpy.random.random((1, N, N // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def f_rfft(inp):
return fft.rfft(inp, norm='ortho')
inputs_val = numpy.random.random((1, N, N)).astype('float32')
utt.verify_grad(f_rfft, [inputs_val], eps=eps)
def f_irfft(inp):
return fft.irfft(inp, norm='no_norm')
inputs_val = numpy.random.random((1, N, N // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论