提交 0a889321 authored 作者: slefrancois's avatar slefrancois

final fft ops take shape as input, interface passes input array shape + odd correction

上级 29271d0a
...@@ -38,7 +38,14 @@ class CuRFFTOp(Op): ...@@ -38,7 +38,14 @@ class CuRFFTOp(Op):
broadcastable=[False] * (inp.type.ndim + 1), broadcastable=[False] * (inp.type.ndim + 1),
context_name=inp.type.context_name) context_name=inp.type.context_name)
def make_node(self, inp): def make_node(self, inp, s=None):
# A shape parameter s can be provided as an input. For now this is used to
# manage odd transform sizes.
# Later this could be extended to handle padding and trunkation,
# following numpy's interface. However, cuFFT expects array that match
# the shape given to the plan, so padding will have to be done in the op.
# The effect of padding on gradients has yet to be investigated.
if not scikits_cuda_available: if not scikits_cuda_available:
raise RuntimeError("scikits.cuda is needed for CuFFTOp") raise RuntimeError("scikits.cuda is needed for CuFFTOp")
...@@ -52,9 +59,16 @@ class CuRFFTOp(Op): ...@@ -52,9 +59,16 @@ class CuRFFTOp(Op):
basic_ops.as_gpuarray_variable(inp, basic_ops.as_gpuarray_variable(inp,
basic_ops.infer_context_name(inp))) basic_ops.infer_context_name(inp)))
# If no shape is provided as input, default to input data shape.
if s is None:
s = inp.shape[1:]
s = T.as_tensor_variable(s)
assert inp.dtype == "float32" assert inp.dtype == "float32"
assert s.ndim == 1
assert 'int' in s.dtype
return theano.Apply(self, [inp], [self.output_type(inp)()]) return theano.Apply(self, [inp, s], [self.output_type(inp)()])
def make_thunk(self, node, storage_map, _, _2): def make_thunk(self, node, storage_map, _, _2):
...@@ -70,9 +84,13 @@ class CuRFFTOp(Op): ...@@ -70,9 +84,13 @@ class CuRFFTOp(Op):
def thunk(): def thunk():
input_shape = inputs[0][0].shape input_shape = inputs[0][0].shape
s = inputs[1][0]
# Since padding is not supported, assert s matches input shape.
assert (input_shape[1:] == s).all()
# construct output shape # construct output shape
output_shape = list(input_shape) output_shape = [input_shape[0]] + list(s)
# DFT of real input is symmetric, no need to store # DFT of real input is symmetric, no need to store
# redundant coefficients # redundant coefficients
output_shape[-1] = output_shape[-1] // 2 + 1 output_shape[-1] = output_shape[-1] // 2 + 1
...@@ -99,13 +117,15 @@ class CuRFFTOp(Op): ...@@ -99,13 +117,15 @@ class CuRFFTOp(Op):
# only initialise plan if necessary # only initialise plan if necessary
if plan[0] is None or plan_input_shape[0] != input_shape: if plan[0] is None or plan_input_shape[0] != input_shape:
plan_input_shape[0] = input_shape plan_input_shape[0] = input_shape
plan[0] = fft.Plan(input_shape[1:], np.float32, np.complex64, plan[0] = fft.Plan(s, np.float32, np.complex64,
batch=input_shape[0]) batch=input_shape[0])
# Sync GPU variables before computation # Sync GPU variables before computation
input_pycuda.sync() input_pycuda.sync()
output_pycuda.sync() output_pycuda.sync()
fft.fft(input_pycuda, output_pycuda, plan[0]) fft.fft(input_pycuda, output_pycuda, plan[0])
# Sync results to ensure output contains completed computation # Sync results to ensure output contains completed computation
pycuda.driver.Context.synchronize() pycuda.driver.Context.synchronize()
...@@ -117,15 +137,18 @@ class CuRFFTOp(Op): ...@@ -117,15 +137,18 @@ class CuRFFTOp(Op):
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
gout, = output_grads gout, = output_grads
s = inputs[0].shape[1:] s = inputs[1]
is_odd = s[-1] % 2 # Divide the last dimension of the output gradients by 2, they are
# Divide the last dimension of the output gradients by 2, they are
# double-counted by the real-IFFT due to symmetry, except the first # double-counted by the real-IFFT due to symmetry, except the first
# and last elements (for even transforms) which are unique. # and last elements (for even transforms) which are unique.
idx = [slice(None)] * (gout.ndim - 2) \ idx = [slice(None)] * (gout.ndim - 2) \
+ [slice(1, (s[-1] // 2) + is_odd)] + [slice(None)] + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
gout = T.set_subtensor(gout[idx], gout[idx]*0.5) gout = T.set_subtensor(gout[idx], gout[idx] * 0.5)
return [cuirfft_op(gout, is_odd)] return [cuirfft_op(gout, s), DisconnectedType()()]
def connection_pattern(self, node):
# Specificy that shape input parameter has no connection to graph and gradients.
return [[True], [False]]
curfft_op = CuRFFTOp() curfft_op = CuRFFTOp()
...@@ -135,12 +158,19 @@ class CuIRFFTOp(Op): ...@@ -135,12 +158,19 @@ class CuIRFFTOp(Op):
__props__ = () __props__ = ()
def output_type(self, inp): def output_type(self, inp):
# add one extra dim for real/imag # remove extra dim for real/imag
return GpuArrayType(inp.dtype, return GpuArrayType(inp.dtype,
broadcastable=[False] * (inp.type.ndim - 1), broadcastable=[False] * (inp.type.ndim - 1),
context_name=inp.type.context_name) context_name=inp.type.context_name)
def make_node(self, inp, is_odd): def make_node(self, inp, s=None):
# A shape parameter is expected as an input. For now this is used to
# manage odd transform sizes.
# Later this could be extended to handle padding and trunkation,
# following numpy's interface. However, cuFFT expects array that match
# the shape given to the plan, so padding will have to be done in the op.
# The effect of padding on gradients has yet to be investigated.
if not scikits_cuda_available: if not scikits_cuda_available:
raise RuntimeError("scikits.cuda is needed for CuIFFTOp") raise RuntimeError("scikits.cuda is needed for CuIFFTOp")
...@@ -153,12 +183,17 @@ class CuIRFFTOp(Op): ...@@ -153,12 +183,17 @@ class CuIRFFTOp(Op):
inp = basic_ops.gpu_contiguous( inp = basic_ops.gpu_contiguous(
basic_ops.as_gpuarray_variable(inp, basic_ops.as_gpuarray_variable(inp,
basic_ops.infer_context_name(inp))) basic_ops.infer_context_name(inp)))
is_odd = T.as_tensor_variable(is_odd)
# If no shape is provided as input, calculate shape assuming even real transform.
if s is None:
s = inp.shape[1:-1]
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = T.as_tensor_variable(s)
assert inp.dtype == "float32" assert inp.dtype == "float32"
assert 'int' in is_odd.dtype assert s.ndim == 1
return theano.Apply(self, [inp, is_odd], [self.output_type(inp)()]) return theano.Apply(self, [inp, s], [self.output_type(inp)()])
def make_thunk(self, node, storage_map, _, _2): def make_thunk(self, node, storage_map, _, _2):
...@@ -174,16 +209,18 @@ class CuIRFFTOp(Op): ...@@ -174,16 +209,18 @@ class CuIRFFTOp(Op):
def thunk(): def thunk():
input_shape = inputs[0][0].shape input_shape = inputs[0][0].shape
is_odd = inputs[1][0] s = inputs[1][0]
assert is_odd in (0, 1)
# Since padding is not supported, assert that last dimension corresponds to
# input forward transform size.
assert (input_shape[1:-2] == s[:-1]).all()
assert ((input_shape[-2] - 1) * 2 + s[-1] % 2 == s[-1]).all()
# construct output shape # construct output shape
# chop off the extra length-2 dimension for real/imag # chop off the extra length-2 dimension for real/imag
output_shape = list(input_shape[:-1]) output_shape = [input_shape[0]] + list(s)
# restore full signal length
output_shape[-1] = (output_shape[-1] - 1) * 2 + is_odd
output_shape = tuple(output_shape) output_shape = tuple(output_shape)
z = outputs[0] z = outputs[0]
# only allocate if there is no previous allocation of the # only allocate if there is no previous allocation of the
...@@ -202,8 +239,9 @@ class CuIRFFTOp(Op): ...@@ -202,8 +239,9 @@ class CuIRFFTOp(Op):
# only initialise plan if necessary # only initialise plan if necessary
if plan[0] is None or plan_input_shape[0] != input_shape: if plan[0] is None or plan_input_shape[0] != input_shape:
plan_input_shape[0] = input_shape plan_input_shape[0] = input_shape
plan[0] = fft.Plan(output_shape[1:], np.complex64, np.float32, plan[0] = fft.Plan(s, np.complex64, np.float32,
batch=output_shape[0]) batch=output_shape[0])
# Sync GPU variables before computation # Sync GPU variables before computation
input_pycuda.sync() input_pycuda.sync()
output_pycuda.sync() output_pycuda.sync()
...@@ -221,33 +259,35 @@ class CuIRFFTOp(Op): ...@@ -221,33 +259,35 @@ class CuIRFFTOp(Op):
thunk.lazy = False thunk.lazy = False
return thunk return thunk
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
gout, = output_grads gout, = output_grads
s = gout.shape s = inputs[1]
gf = curfft_op(gout) gf = curfft_op(gout, s)
# Multiply the last dimension of the gradient by 2, they represent # Multiply the last dimension of the gradient by 2, they represent
# both positive and negative frequencies, except the first # both positive and negative frequencies, except the first
# and last elements (for even transforms) which are unique. # and last elements (for even transforms) which are unique.
idx = [slice(None)] * (gf.ndim - 2) \ idx = [slice(None)] * (gf.ndim - 2) \
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)] + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
gf = T.set_subtensor(gf[idx], gf[idx]*2) gf = T.set_subtensor(gf[idx], gf[idx] * 2)
return [gf, DisconnectedType()()] return [gf, DisconnectedType()()]
def connection_pattern(self, node): def connection_pattern(self, node):
return [[True],[False]] # Specificy that shape input parameter has no connection to graph and gradients.
return [[True], [False]]
cuirfft_op = CuIRFFTOp() cuirfft_op = CuIRFFTOp()
def curfft(inp, norm=None): def curfft(inp, norm=None):
""" """
Performs the fast Fourier transform of a real-valued output on the GPU Performs the fast Fourier transform of a real-valued output on the GPU
through the gpuarray backend. through the gpuarray backend.
The input must be a real-valued float32 variable of dimensions (m, ..., n). The input must be a real-valued float32 variable of dimensions (m, ..., n).
It performs FFTs of size (..., n) on m batches. It performs FFTs of size (..., n) on m batches.
The output is a GpuArray of dimensions (m, ..., n//2+1, 2). The second to The output is a GpuArray of dimensions (m, ..., n//2+1, 2). The second to
last dimension of the output contains the n//2+1 non-trivial elements of last dimension of the output contains the n//2+1 non-trivial elements of
the real-valued FFTs. The real and imaginary parts are stored as two the real-valued FFTs. The real and imaginary parts are stored as two
float32 arrays, emulating complex64. Since theano does not support complex float32 arrays, emulating complex64. Since theano does not support complex
...@@ -269,14 +309,14 @@ def curfft(inp, norm=None): ...@@ -269,14 +309,14 @@ def curfft(inp, norm=None):
s = inp.shape[1:] s = inp.shape[1:]
cond_norm = _unitary(norm) cond_norm = _unitary(norm)
if cond_norm is None or cond_norm == "no_norm": scaling = 1
scaling = 1 if cond_norm == "ortho":
elif cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype('float32')) scaling = T.sqrt(s.prod().astype('float32'))
return curfft_op(inp) / scaling
def cuirfft(inp, norm=None, is_odd=0): return curfft_op(inp, s) / scaling
def cuirfft(inp, norm=None, is_odd=False):
""" """
Performs the real-valued output inverse Fourier Transform using the Performs the real-valued output inverse Fourier Transform using the
gpuarray backend. gpuarray backend.
...@@ -288,37 +328,42 @@ def cuirfft(inp, norm=None, is_odd=0): ...@@ -288,37 +328,42 @@ def cuirfft(inp, norm=None, is_odd=0):
given that Theano does not support complex numbers. given that Theano does not support complex numbers.
The output is a real-valued float32 variable of dimensions (m, ..., n) The output is a real-valued float32 variable of dimensions (m, ..., n)
giving the m inverse FFTs. giving the m inverse FFTs.
Parameters Parameters
---------- ----------
inp inp
Array of float32 of size (m, ..., n//2+1, 2), containing m inputs Array of float32 of size (m, ..., n//2+1, 2), containing m inputs
with n/2+1 non-trivial elements on the last dimension and real with n//2+1 non-trivial elements on the last dimension and real
and imaginary parts stored as separate arrays. and imaginary parts stored as separate arrays.
norm : {None, 'ortho', 'no_norm'} norm : {None, 'ortho', 'no_norm'}
Normalization of transform. Following numpy, default *None* normalizes Normalization of transform. Following numpy, default *None* normalizes
only the inverse transform by n, 'ortho' yields the unitary transform only the inverse transform by n, 'ortho' yields the unitary transform
(:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves (:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves
the transform unnormalized. the transform unnormalized.
is_odd : {True, False}
Set to True to get a real inverse transform output with an odd last dimension
of length (N-1)*2 + 1 for an input last dimension of length N.
""" """
if is_odd != 0: if is_odd not in (True, False):
is_odd = 1 raise ValueError("Invalid value %s for id_odd, must be True or False" % is_odd)
s = inp.shape[1:-1] s = inp.shape[1:-1]
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + is_odd) if is_odd:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
else:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
cond_norm = _unitary(norm) cond_norm = _unitary(norm)
scaling = 1
if cond_norm is None: if cond_norm is None:
scaling = s.prod().astype('float32') scaling = s.prod().astype('float32')
if cond_norm == "ortho": elif cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype('float32')) scaling = T.sqrt(s.prod().astype('float32'))
if cond_norm == "no_norm":
scaling = 1
return cuirfft_op(inp, is_odd) / scaling return cuirfft_op(inp, s) / scaling
def _unitary(norm): def _unitary(norm):
if norm not in (None, "ortho", "no_norm"): if norm not in (None, "ortho", "no_norm"):
......
...@@ -25,14 +25,13 @@ if not pycuda_available: # noqa ...@@ -25,14 +25,13 @@ if not pycuda_available: # noqa
import theano.gpuarray.cuda_fft import theano.gpuarray.cuda_fft
# Transform sizes # Transform sizes
N = 16 N = 64
class TestFFT(unittest.TestCase): class TestFFT(unittest.TestCase):
def test_1Dfft(self): def test_1Dfft(self):
inputs_val = np.random.random((1, N)).astype('float32') inputs_val = np.random.random((1, N)).astype('float32')
inputs = theano.shared(inputs_val)
x = T.matrix('x', dtype='float32') x = T.matrix('x', dtype='float32')
rfft = theano.gpuarray.fft.curfft(x) rfft = theano.gpuarray.fft.curfft(x)
...@@ -40,200 +39,215 @@ class TestFFT(unittest.TestCase): ...@@ -40,200 +39,215 @@ class TestFFT(unittest.TestCase):
res_rfft = f_rfft(inputs_val) res_rfft = f_rfft(inputs_val)
res_rfft_comp = (np.asarray(res_rfft[:, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, 0]) +
1j * np.asarray(res_rfft[:, :, 1])) 1j * np.asarray(res_rfft[:, :, 1]))
rfft_ref = numpy.fft.rfft(inputs_val, axis=1) rfft_ref = numpy.fft.rfft(inputs_val, axis=1)
utt.assert_allclose(rfft_ref, res_rfft_comp) utt.assert_allclose(rfft_ref, res_rfft_comp)
m = rfft.type() m = rfft.type()
irfft = theano.gpuarray.fft.cuirfft(m) irfft = theano.gpuarray.fft.cuirfft(m)
f_irfft = theano.function([m], irfft, mode=mode_with_gpu) f_irfft = theano.function([m], irfft, mode=mode_with_gpu)
res_irfft = f_irfft(res_rfft) res_irfft = f_irfft(res_rfft)
utt.assert_allclose(inputs_val, np.asarray(res_irfft)) utt.assert_allclose(inputs_val, np.asarray(res_irfft))
# The numerical gradient of the FFT is sensitive, must set large # The numerical gradient of the FFT is sensitive, must set large
# enough epsilon to get good accuracy. # enough epsilon to get good accuracy.
eps = 1e-1 eps = 1e-1
def f_rfft(inp): def f_rfft(inp):
return theano.gpuarray.fft.curfft(inp) return theano.gpuarray.fft.curfft(inp)
inputs_val = np.random.random((1, N)).astype('float32') inputs_val = np.random.random((1, N)).astype('float32')
utt.verify_grad(f_rfft, [inputs_val], eps=eps) utt.verify_grad(f_rfft, [inputs_val], eps=eps)
def f_irfft(inp): def f_irfft(inp):
return theano.gpuarray.fft.cuirfft(inp) return theano.gpuarray.fft.cuirfft(inp)
inputs_val = np.random.random((1, N//2+1, 2)).astype('float32') inputs_val = np.random.random((1, N // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps) utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def test_rfft(self): def test_rfft(self):
inputs_val = np.random.random((1, N, N)).astype('float32') inputs_val = np.random.random((1, N, N)).astype('float32')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
rfft = theano.gpuarray.fft.curfft(inputs) rfft = theano.gpuarray.fft.curfft(inputs)
f_rfft = theano.function([], rfft, mode=mode_with_gpu) f_rfft = theano.function([], rfft, mode=mode_with_gpu)
res_rfft = f_rfft() res_rfft = f_rfft()
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1,2)) rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4) utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
def test_irfft(self): def test_irfft(self):
inputs_val = np.random.random((1, N, N)).astype('float32') inputs_val = np.random.random((1, N, N)).astype('float32')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
fft = theano.gpuarray.fft.curfft(inputs) fft = theano.gpuarray.fft.curfft(inputs)
f_fft = theano.function([], fft, mode=mode_with_gpu) f_fft = theano.function([], fft, mode=mode_with_gpu)
res_fft = f_fft() res_fft = f_fft()
m = fft.type() m = fft.type()
ifft = theano.gpuarray.fft.cuirfft(m) ifft = theano.gpuarray.fft.cuirfft(m)
f_ifft = theano.function([m], ifft, mode=mode_with_gpu) f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
res_ifft = f_ifft(res_fft) res_ifft = f_ifft(res_fft)
utt.assert_allclose(inputs_val, np.asarray(res_ifft)) utt.assert_allclose(inputs_val, np.asarray(res_ifft))
def test_type(self): def test_type(self):
inputs_val = np.random.random((1, N)).astype('float64') inputs_val = np.random.random((1, N)).astype('float64')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
with self.assertRaises(AssertionError): with self.assertRaises(AssertionError):
theano.gpuarray.fft.curfft(inputs) theano.gpuarray.fft.curfft(inputs)
with self.assertRaises(AssertionError): with self.assertRaises(AssertionError):
theano.gpuarray.fft.cuirfft(inputs) theano.gpuarray.fft.cuirfft(inputs)
def test_norm(self): def test_norm(self):
inputs_val = np.random.random((1, N, N)).astype('float32') inputs_val = np.random.random((1, N, N)).astype('float32')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
# Unitary normalization # Unitary normalization
rfft = theano.gpuarray.fft.curfft(inputs, norm='ortho') rfft = theano.gpuarray.fft.curfft(inputs, norm='ortho')
f_rfft = theano.function([], rfft, mode=mode_with_gpu) f_rfft = theano.function([], rfft, mode=mode_with_gpu)
res_rfft = f_rfft() res_rfft = f_rfft()
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
rfft_ref_ortho = numpy.fft.rfftn(inputs_val, axes=(1,2), norm='ortho') rfft_ref_ortho = numpy.fft.rfftn(inputs_val, axes=(1, 2), norm='ortho')
utt.assert_allclose(rfft_ref_ortho, res_rfft_comp, utt.assert_allclose(rfft_ref_ortho, res_rfft_comp,
atol=1e-4, rtol=1e-4) atol=1e-4, rtol=1e-4)
# No normalization # No normalization
rfft = theano.gpuarray.fft.curfft(inputs, norm='no_norm') rfft = theano.gpuarray.fft.curfft(inputs, norm='no_norm')
f_rfft = theano.function([], rfft, mode=mode_with_gpu) f_rfft = theano.function([], rfft, mode=mode_with_gpu)
res_rfft = f_rfft() res_rfft = f_rfft()
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
utt.assert_allclose(rfft_ref_ortho * np.sqrt(N*N), utt.assert_allclose(rfft_ref_ortho * np.sqrt(N * N),
res_rfft_comp, atol=1e-4, rtol=1e-4) res_rfft_comp, atol=1e-4, rtol=1e-4)
# Inverse FFT inputs # Inverse FFT inputs
inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype('float32') inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype('float32')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1] inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
# Unitary normalization inverse FFT # Unitary normalization inverse FFT
irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho') irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho')
f_irfft = theano.function([], irfft, mode=mode_with_gpu) f_irfft = theano.function([], irfft, mode=mode_with_gpu)
res_irfft = f_irfft() res_irfft = f_irfft()
irfft_ref_ortho = numpy.fft.irfftn(inputs_ref, axes=(1,2), norm='ortho') irfft_ref_ortho = numpy.fft.irfftn(
inputs_ref, axes=(1, 2), norm='ortho')
utt.assert_allclose(irfft_ref_ortho, utt.assert_allclose(irfft_ref_ortho,
res_irfft, atol=1e-4, rtol=1e-4) res_irfft, atol=1e-4, rtol=1e-4)
# No normalization inverse FFT # No normalization inverse FFT
irfft = theano.gpuarray.fft.cuirfft(inputs, norm='no_norm') irfft = theano.gpuarray.fft.cuirfft(inputs, norm='no_norm')
f_irfft = theano.function([], irfft, mode=mode_with_gpu) f_irfft = theano.function([], irfft, mode=mode_with_gpu)
res_irfft = f_irfft() res_irfft = f_irfft()
utt.assert_allclose(irfft_ref_ortho * np.sqrt(N*N), utt.assert_allclose(irfft_ref_ortho * np.sqrt(N * N),
res_irfft, atol=1e-4, rtol=1e-4) res_irfft, atol=1e-4, rtol=1e-4)
def test_grad(self): def test_grad(self):
# The numerical gradient of the FFT is sensitive, must set large # The numerical gradient of the FFT is sensitive, must set large
# enough epsilon to get good accuracy. # enough epsilon to get good accuracy.
eps = 1e-1 eps = 1e-1
def f_rfft(inp): def f_rfft(inp):
return theano.gpuarray.fft.curfft(inp) return theano.gpuarray.fft.curfft(inp)
inputs_val = np.random.random((1, N, N)).astype('float32') inputs_val = np.random.random((1, N, N)).astype('float32')
utt.verify_grad(f_rfft, [inputs_val], eps=eps) utt.verify_grad(f_rfft, [inputs_val], eps=eps)
def f_irfft(inp): def f_irfft(inp):
return theano.gpuarray.fft.cuirfft(inp) return theano.gpuarray.fft.cuirfft(inp)
inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype('float32') inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps) utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def f_rfft(inp): def f_rfft(inp):
return theano.gpuarray.fft.curfft(inp, norm='ortho') return theano.gpuarray.fft.curfft(inp, norm='ortho')
inputs_val = np.random.random((1, N, N)).astype('float32') inputs_val = np.random.random((1, N, N)).astype('float32')
utt.verify_grad(f_rfft, [inputs_val], eps=eps) utt.verify_grad(f_rfft, [inputs_val], eps=eps)
def f_irfft(inp): def f_irfft(inp):
return theano.gpuarray.fft.cuirfft(inp, norm='no_norm') return theano.gpuarray.fft.cuirfft(inp, norm='no_norm')
inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype('float32') inputs_val = np.random.random((1, N, N // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps) utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def test_odd(self): def test_odd(self):
M = N - 1 M = N - 1
inputs_val = np.random.random((1, M, M)).astype('float32') inputs_val = np.random.random((1, M, M)).astype('float32')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
rfft = theano.gpuarray.fft.curfft(inputs) rfft = theano.gpuarray.fft.curfft(inputs)
f_rfft = theano.function([], rfft, mode=mode_with_gpu) f_rfft = theano.function([], rfft, mode=mode_with_gpu)
res_rfft = f_rfft() res_rfft = f_rfft()
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
rfft_ref = numpy.fft.rfftn(inputs_val, s=(M,M), axes=(1,2))#, s=(M, M), axes=(1,2)) rfft_ref = numpy.fft.rfftn(inputs_val, s=(M, M), axes=(1, 2))
utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4) utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
m = rfft.type() m = rfft.type()
ifft = theano.gpuarray.fft.cuirfft(m, is_odd=True) ifft = theano.gpuarray.fft.cuirfft(m, is_odd=True)
f_ifft = theano.function([m], ifft, mode=mode_with_gpu) f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
res_ifft = f_ifft(res_rfft) res_ifft = f_ifft(res_rfft)
utt.assert_allclose(inputs_val, np.asarray(res_ifft)) utt.assert_allclose(inputs_val, np.asarray(res_ifft))
inputs_val = np.random.random((1, M, M//2+1, 2)).astype('float32') inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho', is_odd=True) irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho', is_odd=True)
f_irfft = theano.function([], irfft, mode=mode_with_gpu) f_irfft = theano.function([], irfft, mode=mode_with_gpu)
res_irfft = f_irfft() res_irfft = f_irfft()
inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1] inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
irfft_ref = numpy.fft.irfftn(inputs_ref, s=(M, M), axes=(1,2), norm='ortho') irfft_ref = numpy.fft.irfftn(
inputs_ref, s=(M, M), axes=(1, 2), norm='ortho')
utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4) utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
# The numerical gradient of the FFT is sensitive, must set large # The numerical gradient of the FFT is sensitive, must set large
# enough epsilon to get good accuracy. # enough epsilon to get good accuracy.
eps = 1e-1 eps = 1e-1
def f_rfft(inp): def f_rfft(inp):
return theano.gpuarray.fft.curfft(inp) return theano.gpuarray.fft.curfft(inp)
inputs_val = np.random.random((1, M, M)).astype('float32') inputs_val = np.random.random((1, M, M)).astype('float32')
utt.verify_grad(f_rfft, [inputs_val], eps=eps) utt.verify_grad(f_rfft, [inputs_val], eps=eps)
def f_irfft(inp): def f_irfft(inp):
return theano.gpuarray.fft.cuirfft(inp, is_odd=True) return theano.gpuarray.fft.cuirfft(inp, is_odd=True)
inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32') inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps) utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def f_rfft(inp): def f_rfft(inp):
return theano.gpuarray.fft.curfft(inp, norm='ortho') return theano.gpuarray.fft.curfft(inp, norm='ortho')
inputs_val = np.random.random((1, M, M)).astype('float32') inputs_val = np.random.random((1, M, M)).astype('float32')
utt.verify_grad(f_rfft, [inputs_val], eps=eps) utt.verify_grad(f_rfft, [inputs_val], eps=eps)
def f_irfft(inp): def f_irfft(inp):
return theano.gpuarray.fft.cuirfft(inp, norm='no_norm', is_odd=True) return theano.gpuarray.fft.cuirfft(inp, norm='no_norm', is_odd=True)
inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32') inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps) utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def test_params(self):
inputs_val = np.random.random((1, N)).astype('float32')
inputs = theano.shared(inputs_val)
with self.assertRaises(ValueError):
theano.gpuarray.fft.curfft(inputs, norm=123)
inputs_val = np.random.random((1, N // 2 + 1, 2)).astype('float32')
inputs = theano.shared(inputs_val)
with self.assertRaises(ValueError):
theano.gpuarray.fft.cuirfft(inputs, norm=123)
with self.assertRaises(ValueError):
theano.gpuarray.fft.cuirfft(inputs, is_odd=123)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论