提交 0a889321 authored 作者: slefrancois's avatar slefrancois

final fft ops take shape as input, interface passes input array shape + odd correction

上级 29271d0a
...@@ -38,7 +38,14 @@ class CuRFFTOp(Op): ...@@ -38,7 +38,14 @@ class CuRFFTOp(Op):
broadcastable=[False] * (inp.type.ndim + 1), broadcastable=[False] * (inp.type.ndim + 1),
context_name=inp.type.context_name) context_name=inp.type.context_name)
def make_node(self, inp): def make_node(self, inp, s=None):
# A shape parameter s can be provided as an input. For now this is used to
# manage odd transform sizes.
# Later this could be extended to handle padding and trunkation,
# following numpy's interface. However, cuFFT expects array that match
# the shape given to the plan, so padding will have to be done in the op.
# The effect of padding on gradients has yet to be investigated.
if not scikits_cuda_available: if not scikits_cuda_available:
raise RuntimeError("scikits.cuda is needed for CuFFTOp") raise RuntimeError("scikits.cuda is needed for CuFFTOp")
...@@ -52,9 +59,16 @@ class CuRFFTOp(Op): ...@@ -52,9 +59,16 @@ class CuRFFTOp(Op):
basic_ops.as_gpuarray_variable(inp, basic_ops.as_gpuarray_variable(inp,
basic_ops.infer_context_name(inp))) basic_ops.infer_context_name(inp)))
# If no shape is provided as input, default to input data shape.
if s is None:
s = inp.shape[1:]
s = T.as_tensor_variable(s)
assert inp.dtype == "float32" assert inp.dtype == "float32"
assert s.ndim == 1
assert 'int' in s.dtype
return theano.Apply(self, [inp], [self.output_type(inp)()]) return theano.Apply(self, [inp, s], [self.output_type(inp)()])
def make_thunk(self, node, storage_map, _, _2): def make_thunk(self, node, storage_map, _, _2):
...@@ -70,9 +84,13 @@ class CuRFFTOp(Op): ...@@ -70,9 +84,13 @@ class CuRFFTOp(Op):
def thunk(): def thunk():
input_shape = inputs[0][0].shape input_shape = inputs[0][0].shape
s = inputs[1][0]
# Since padding is not supported, assert s matches input shape.
assert (input_shape[1:] == s).all()
# construct output shape # construct output shape
output_shape = list(input_shape) output_shape = [input_shape[0]] + list(s)
# DFT of real input is symmetric, no need to store # DFT of real input is symmetric, no need to store
# redundant coefficients # redundant coefficients
output_shape[-1] = output_shape[-1] // 2 + 1 output_shape[-1] = output_shape[-1] // 2 + 1
...@@ -99,13 +117,15 @@ class CuRFFTOp(Op): ...@@ -99,13 +117,15 @@ class CuRFFTOp(Op):
# only initialise plan if necessary # only initialise plan if necessary
if plan[0] is None or plan_input_shape[0] != input_shape: if plan[0] is None or plan_input_shape[0] != input_shape:
plan_input_shape[0] = input_shape plan_input_shape[0] = input_shape
plan[0] = fft.Plan(input_shape[1:], np.float32, np.complex64, plan[0] = fft.Plan(s, np.float32, np.complex64,
batch=input_shape[0]) batch=input_shape[0])
# Sync GPU variables before computation # Sync GPU variables before computation
input_pycuda.sync() input_pycuda.sync()
output_pycuda.sync() output_pycuda.sync()
fft.fft(input_pycuda, output_pycuda, plan[0]) fft.fft(input_pycuda, output_pycuda, plan[0])
# Sync results to ensure output contains completed computation # Sync results to ensure output contains completed computation
pycuda.driver.Context.synchronize() pycuda.driver.Context.synchronize()
...@@ -117,15 +137,18 @@ class CuRFFTOp(Op): ...@@ -117,15 +137,18 @@ class CuRFFTOp(Op):
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
gout, = output_grads gout, = output_grads
s = inputs[0].shape[1:] s = inputs[1]
is_odd = s[-1] % 2
# Divide the last dimension of the output gradients by 2, they are # Divide the last dimension of the output gradients by 2, they are
# double-counted by the real-IFFT due to symmetry, except the first # double-counted by the real-IFFT due to symmetry, except the first
# and last elements (for even transforms) which are unique. # and last elements (for even transforms) which are unique.
idx = [slice(None)] * (gout.ndim - 2) \ idx = [slice(None)] * (gout.ndim - 2) \
+ [slice(1, (s[-1] // 2) + is_odd)] + [slice(None)] + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
gout = T.set_subtensor(gout[idx], gout[idx]*0.5) gout = T.set_subtensor(gout[idx], gout[idx] * 0.5)
return [cuirfft_op(gout, is_odd)] return [cuirfft_op(gout, s), DisconnectedType()()]
def connection_pattern(self, node):
# Specificy that shape input parameter has no connection to graph and gradients.
return [[True], [False]]
curfft_op = CuRFFTOp() curfft_op = CuRFFTOp()
...@@ -135,12 +158,19 @@ class CuIRFFTOp(Op): ...@@ -135,12 +158,19 @@ class CuIRFFTOp(Op):
__props__ = () __props__ = ()
def output_type(self, inp): def output_type(self, inp):
# add one extra dim for real/imag # remove extra dim for real/imag
return GpuArrayType(inp.dtype, return GpuArrayType(inp.dtype,
broadcastable=[False] * (inp.type.ndim - 1), broadcastable=[False] * (inp.type.ndim - 1),
context_name=inp.type.context_name) context_name=inp.type.context_name)
def make_node(self, inp, is_odd): def make_node(self, inp, s=None):
# A shape parameter is expected as an input. For now this is used to
# manage odd transform sizes.
# Later this could be extended to handle padding and trunkation,
# following numpy's interface. However, cuFFT expects array that match
# the shape given to the plan, so padding will have to be done in the op.
# The effect of padding on gradients has yet to be investigated.
if not scikits_cuda_available: if not scikits_cuda_available:
raise RuntimeError("scikits.cuda is needed for CuIFFTOp") raise RuntimeError("scikits.cuda is needed for CuIFFTOp")
...@@ -153,12 +183,17 @@ class CuIRFFTOp(Op): ...@@ -153,12 +183,17 @@ class CuIRFFTOp(Op):
inp = basic_ops.gpu_contiguous( inp = basic_ops.gpu_contiguous(
basic_ops.as_gpuarray_variable(inp, basic_ops.as_gpuarray_variable(inp,
basic_ops.infer_context_name(inp))) basic_ops.infer_context_name(inp)))
is_odd = T.as_tensor_variable(is_odd)
# If no shape is provided as input, calculate shape assuming even real transform.
if s is None:
s = inp.shape[1:-1]
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
s = T.as_tensor_variable(s)
assert inp.dtype == "float32" assert inp.dtype == "float32"
assert 'int' in is_odd.dtype assert s.ndim == 1
return theano.Apply(self, [inp, is_odd], [self.output_type(inp)()]) return theano.Apply(self, [inp, s], [self.output_type(inp)()])
def make_thunk(self, node, storage_map, _, _2): def make_thunk(self, node, storage_map, _, _2):
...@@ -174,14 +209,16 @@ class CuIRFFTOp(Op): ...@@ -174,14 +209,16 @@ class CuIRFFTOp(Op):
def thunk(): def thunk():
input_shape = inputs[0][0].shape input_shape = inputs[0][0].shape
is_odd = inputs[1][0] s = inputs[1][0]
assert is_odd in (0, 1)
# Since padding is not supported, assert that last dimension corresponds to
# input forward transform size.
assert (input_shape[1:-2] == s[:-1]).all()
assert ((input_shape[-2] - 1) * 2 + s[-1] % 2 == s[-1]).all()
# construct output shape # construct output shape
# chop off the extra length-2 dimension for real/imag # chop off the extra length-2 dimension for real/imag
output_shape = list(input_shape[:-1]) output_shape = [input_shape[0]] + list(s)
# restore full signal length
output_shape[-1] = (output_shape[-1] - 1) * 2 + is_odd
output_shape = tuple(output_shape) output_shape = tuple(output_shape)
z = outputs[0] z = outputs[0]
...@@ -202,8 +239,9 @@ class CuIRFFTOp(Op): ...@@ -202,8 +239,9 @@ class CuIRFFTOp(Op):
# only initialise plan if necessary # only initialise plan if necessary
if plan[0] is None or plan_input_shape[0] != input_shape: if plan[0] is None or plan_input_shape[0] != input_shape:
plan_input_shape[0] = input_shape plan_input_shape[0] = input_shape
plan[0] = fft.Plan(output_shape[1:], np.complex64, np.float32, plan[0] = fft.Plan(s, np.complex64, np.float32,
batch=output_shape[0]) batch=output_shape[0])
# Sync GPU variables before computation # Sync GPU variables before computation
input_pycuda.sync() input_pycuda.sync()
output_pycuda.sync() output_pycuda.sync()
...@@ -224,21 +262,23 @@ class CuIRFFTOp(Op): ...@@ -224,21 +262,23 @@ class CuIRFFTOp(Op):
def grad(self, inputs, output_grads): def grad(self, inputs, output_grads):
gout, = output_grads gout, = output_grads
s = gout.shape s = inputs[1]
gf = curfft_op(gout) gf = curfft_op(gout, s)
# Multiply the last dimension of the gradient by 2, they represent # Multiply the last dimension of the gradient by 2, they represent
# both positive and negative frequencies, except the first # both positive and negative frequencies, except the first
# and last elements (for even transforms) which are unique. # and last elements (for even transforms) which are unique.
idx = [slice(None)] * (gf.ndim - 2) \ idx = [slice(None)] * (gf.ndim - 2) \
+ [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)] + [slice(1, (s[-1] // 2) + (s[-1] % 2))] + [slice(None)]
gf = T.set_subtensor(gf[idx], gf[idx]*2) gf = T.set_subtensor(gf[idx], gf[idx] * 2)
return [gf, DisconnectedType()()] return [gf, DisconnectedType()()]
def connection_pattern(self, node): def connection_pattern(self, node):
return [[True],[False]] # Specificy that shape input parameter has no connection to graph and gradients.
return [[True], [False]]
cuirfft_op = CuIRFFTOp() cuirfft_op = CuIRFFTOp()
def curfft(inp, norm=None): def curfft(inp, norm=None):
""" """
Performs the fast Fourier transform of a real-valued output on the GPU Performs the fast Fourier transform of a real-valued output on the GPU
...@@ -269,14 +309,14 @@ def curfft(inp, norm=None): ...@@ -269,14 +309,14 @@ def curfft(inp, norm=None):
s = inp.shape[1:] s = inp.shape[1:]
cond_norm = _unitary(norm) cond_norm = _unitary(norm)
if cond_norm is None or cond_norm == "no_norm":
scaling = 1 scaling = 1
elif cond_norm == "ortho": if cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype('float32')) scaling = T.sqrt(s.prod().astype('float32'))
return curfft_op(inp) / scaling return curfft_op(inp, s) / scaling
def cuirfft(inp, norm=None, is_odd=0): def cuirfft(inp, norm=None, is_odd=False):
""" """
Performs the real-valued output inverse Fourier Transform using the Performs the real-valued output inverse Fourier Transform using the
gpuarray backend. gpuarray backend.
...@@ -294,31 +334,36 @@ def cuirfft(inp, norm=None, is_odd=0): ...@@ -294,31 +334,36 @@ def cuirfft(inp, norm=None, is_odd=0):
---------- ----------
inp inp
Array of float32 of size (m, ..., n//2+1, 2), containing m inputs Array of float32 of size (m, ..., n//2+1, 2), containing m inputs
with n/2+1 non-trivial elements on the last dimension and real with n//2+1 non-trivial elements on the last dimension and real
and imaginary parts stored as separate arrays. and imaginary parts stored as separate arrays.
norm : {None, 'ortho', 'no_norm'} norm : {None, 'ortho', 'no_norm'}
Normalization of transform. Following numpy, default *None* normalizes Normalization of transform. Following numpy, default *None* normalizes
only the inverse transform by n, 'ortho' yields the unitary transform only the inverse transform by n, 'ortho' yields the unitary transform
(:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves (:math:`1/\sqrt n` forward and inverse). In addition, 'no_norm' leaves
the transform unnormalized. the transform unnormalized.
is_odd : {True, False}
Set to True to get a real inverse transform output with an odd last dimension
of length (N-1)*2 + 1 for an input last dimension of length N.
""" """
if is_odd != 0: if is_odd not in (True, False):
is_odd = 1 raise ValueError("Invalid value %s for id_odd, must be True or False" % is_odd)
s = inp.shape[1:-1] s = inp.shape[1:-1]
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + is_odd) if is_odd:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2 + 1)
else:
s = T.set_subtensor(s[-1], (s[-1] - 1) * 2)
cond_norm = _unitary(norm) cond_norm = _unitary(norm)
scaling = 1
if cond_norm is None: if cond_norm is None:
scaling = s.prod().astype('float32') scaling = s.prod().astype('float32')
if cond_norm == "ortho": elif cond_norm == "ortho":
scaling = T.sqrt(s.prod().astype('float32')) scaling = T.sqrt(s.prod().astype('float32'))
if cond_norm == "no_norm":
scaling = 1
return cuirfft_op(inp, is_odd) / scaling return cuirfft_op(inp, s) / scaling
def _unitary(norm): def _unitary(norm):
if norm not in (None, "ortho", "no_norm"): if norm not in (None, "ortho", "no_norm"):
......
...@@ -25,14 +25,13 @@ if not pycuda_available: # noqa ...@@ -25,14 +25,13 @@ if not pycuda_available: # noqa
import theano.gpuarray.cuda_fft import theano.gpuarray.cuda_fft
# Transform sizes # Transform sizes
N = 16 N = 64
class TestFFT(unittest.TestCase): class TestFFT(unittest.TestCase):
def test_1Dfft(self): def test_1Dfft(self):
inputs_val = np.random.random((1, N)).astype('float32') inputs_val = np.random.random((1, N)).astype('float32')
inputs = theano.shared(inputs_val)
x = T.matrix('x', dtype='float32') x = T.matrix('x', dtype='float32')
rfft = theano.gpuarray.fft.curfft(x) rfft = theano.gpuarray.fft.curfft(x)
...@@ -63,7 +62,7 @@ class TestFFT(unittest.TestCase): ...@@ -63,7 +62,7 @@ class TestFFT(unittest.TestCase):
def f_irfft(inp): def f_irfft(inp):
return theano.gpuarray.fft.cuirfft(inp) return theano.gpuarray.fft.cuirfft(inp)
inputs_val = np.random.random((1, N//2+1, 2)).astype('float32') inputs_val = np.random.random((1, N // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps) utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def test_rfft(self): def test_rfft(self):
...@@ -76,7 +75,7 @@ class TestFFT(unittest.TestCase): ...@@ -76,7 +75,7 @@ class TestFFT(unittest.TestCase):
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1,2)) rfft_ref = numpy.fft.rfftn(inputs_val, axes=(1, 2))
utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4) utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
...@@ -115,7 +114,7 @@ class TestFFT(unittest.TestCase): ...@@ -115,7 +114,7 @@ class TestFFT(unittest.TestCase):
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
rfft_ref_ortho = numpy.fft.rfftn(inputs_val, axes=(1,2), norm='ortho') rfft_ref_ortho = numpy.fft.rfftn(inputs_val, axes=(1, 2), norm='ortho')
utt.assert_allclose(rfft_ref_ortho, res_rfft_comp, utt.assert_allclose(rfft_ref_ortho, res_rfft_comp,
atol=1e-4, rtol=1e-4) atol=1e-4, rtol=1e-4)
...@@ -127,7 +126,7 @@ class TestFFT(unittest.TestCase): ...@@ -127,7 +126,7 @@ class TestFFT(unittest.TestCase):
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
utt.assert_allclose(rfft_ref_ortho * np.sqrt(N*N), utt.assert_allclose(rfft_ref_ortho * np.sqrt(N * N),
res_rfft_comp, atol=1e-4, rtol=1e-4) res_rfft_comp, atol=1e-4, rtol=1e-4)
# Inverse FFT inputs # Inverse FFT inputs
...@@ -140,7 +139,8 @@ class TestFFT(unittest.TestCase): ...@@ -140,7 +139,8 @@ class TestFFT(unittest.TestCase):
f_irfft = theano.function([], irfft, mode=mode_with_gpu) f_irfft = theano.function([], irfft, mode=mode_with_gpu)
res_irfft = f_irfft() res_irfft = f_irfft()
irfft_ref_ortho = numpy.fft.irfftn(inputs_ref, axes=(1,2), norm='ortho') irfft_ref_ortho = numpy.fft.irfftn(
inputs_ref, axes=(1, 2), norm='ortho')
utt.assert_allclose(irfft_ref_ortho, utt.assert_allclose(irfft_ref_ortho,
res_irfft, atol=1e-4, rtol=1e-4) res_irfft, atol=1e-4, rtol=1e-4)
...@@ -150,7 +150,7 @@ class TestFFT(unittest.TestCase): ...@@ -150,7 +150,7 @@ class TestFFT(unittest.TestCase):
f_irfft = theano.function([], irfft, mode=mode_with_gpu) f_irfft = theano.function([], irfft, mode=mode_with_gpu)
res_irfft = f_irfft() res_irfft = f_irfft()
utt.assert_allclose(irfft_ref_ortho * np.sqrt(N*N), utt.assert_allclose(irfft_ref_ortho * np.sqrt(N * N),
res_irfft, atol=1e-4, rtol=1e-4) res_irfft, atol=1e-4, rtol=1e-4)
def test_grad(self): def test_grad(self):
...@@ -191,7 +191,7 @@ class TestFFT(unittest.TestCase): ...@@ -191,7 +191,7 @@ class TestFFT(unittest.TestCase):
res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) + res_rfft_comp = (np.asarray(res_rfft[:, :, :, 0]) +
1j * np.asarray(res_rfft[:, :, :, 1])) 1j * np.asarray(res_rfft[:, :, :, 1]))
rfft_ref = numpy.fft.rfftn(inputs_val, s=(M,M), axes=(1,2))#, s=(M, M), axes=(1,2)) rfft_ref = numpy.fft.rfftn(inputs_val, s=(M, M), axes=(1, 2))
utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4) utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)
...@@ -202,7 +202,7 @@ class TestFFT(unittest.TestCase): ...@@ -202,7 +202,7 @@ class TestFFT(unittest.TestCase):
utt.assert_allclose(inputs_val, np.asarray(res_ifft)) utt.assert_allclose(inputs_val, np.asarray(res_ifft))
inputs_val = np.random.random((1, M, M//2+1, 2)).astype('float32') inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho', is_odd=True) irfft = theano.gpuarray.fft.cuirfft(inputs, norm='ortho', is_odd=True)
...@@ -210,7 +210,8 @@ class TestFFT(unittest.TestCase): ...@@ -210,7 +210,8 @@ class TestFFT(unittest.TestCase):
res_irfft = f_irfft() res_irfft = f_irfft()
inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1] inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
irfft_ref = numpy.fft.irfftn(inputs_ref, s=(M, M), axes=(1,2), norm='ortho') irfft_ref = numpy.fft.irfftn(
inputs_ref, s=(M, M), axes=(1, 2), norm='ortho')
utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4) utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)
...@@ -237,3 +238,16 @@ class TestFFT(unittest.TestCase): ...@@ -237,3 +238,16 @@ class TestFFT(unittest.TestCase):
return theano.gpuarray.fft.cuirfft(inp, norm='no_norm', is_odd=True) return theano.gpuarray.fft.cuirfft(inp, norm='no_norm', is_odd=True)
inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32') inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype('float32')
utt.verify_grad(f_irfft, [inputs_val], eps=eps) utt.verify_grad(f_irfft, [inputs_val], eps=eps)
def test_params(self):
inputs_val = np.random.random((1, N)).astype('float32')
inputs = theano.shared(inputs_val)
with self.assertRaises(ValueError):
theano.gpuarray.fft.curfft(inputs, norm=123)
inputs_val = np.random.random((1, N // 2 + 1, 2)).astype('float32')
inputs = theano.shared(inputs_val)
with self.assertRaises(ValueError):
theano.gpuarray.fft.cuirfft(inputs, norm=123)
with self.assertRaises(ValueError):
theano.gpuarray.fft.cuirfft(inputs, is_odd=123)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论