提交 46cf0822 authored 作者: slefrancois's avatar slefrancois

gpuarray fft: guard pycuda import, add unit test, change interface to curfft

上级 04183580
...@@ -3,6 +3,7 @@ from __future__ import absolute_import, print_function, division ...@@ -3,6 +3,7 @@ from __future__ import absolute_import, print_function, division
import numpy as np import numpy as np
import theano import theano
from theano import Op from theano import Op
import theano.tensor as T
from theano.gpuarray import (basic_ops, GpuArrayType) from theano.gpuarray import (basic_ops, GpuArrayType)
...@@ -12,7 +13,11 @@ try: ...@@ -12,7 +13,11 @@ try:
except ImportError: except ImportError:
pygpu_available = False pygpu_available = False
import pycuda.driver try:
import pycuda.driver
pycuda_available = True
except ImportError:
pycuda_available = False
try: try:
import scikits.cuda import scikits.cuda
...@@ -22,21 +27,21 @@ except (ImportError, Exception): ...@@ -22,21 +27,21 @@ except (ImportError, Exception):
scikits_cuda_available = False scikits_cuda_available = False
class CuFFTOp(Op): class CuRFFTOp(Op):
""" """
Performs a fast Fourier transform on the GPU using the scikits CUDA FFT Operator for the fast Fourier transform of a real-valued output on the GPU
through the gpuarray backend. using the scikits CUDA FFT through the gpuarray backend.
The input must be a float32 variable of dimensions (m, n). It The input must be a real-valued float32 variable of dimensions (m, n). It
performs m 1-D FFTs of size n each. performs m 1-D FFTs of size n each.
The output is a GpuArray of dimensions (m, n/2+1, 2). The output contains The output is a GpuArray of dimensions (m, n/2+1, 2). The output contains
the n/2+1 non-trivial elements of the m real-valued FFTs. The real the n//2+1 non-trivial elements of the m real-valued FFTs. The real
and imaginary parts stored as two float32 arrays, emulating complex64. and imaginary parts are stored as two float32 arrays, emulating complex64.
Since theano does not support complex number operations, care must be Since theano does not support complex number operations, care must be
taken to manually implement operators such as multiplication. taken to manually implement operators such as multiplication.
The module provides the convenience function cufft(input). The module provides the convenience function curfft(input).
""" """
__props__ = () __props__ = ()
...@@ -54,6 +59,9 @@ class CuFFTOp(Op): ...@@ -54,6 +59,9 @@ class CuFFTOp(Op):
if not pygpu_available: if not pygpu_available:
raise RuntimeError("pygpu is needed for CuFFTOp") raise RuntimeError("pygpu is needed for CuFFTOp")
if not pycuda_available:
raise RuntimeError("pycuda is needed for CuFFTOp")
inp = basic_ops.gpu_contiguous( inp = basic_ops.gpu_contiguous(
basic_ops.as_gpuarray_variable(inp, basic_ops.as_gpuarray_variable(inp,
basic_ops.infer_context_name(inp))) basic_ops.infer_context_name(inp)))
...@@ -121,21 +129,11 @@ class CuFFTOp(Op): ...@@ -121,21 +129,11 @@ class CuFFTOp(Op):
return thunk return thunk
cufft = CuFFTOp()
"""
Convenience function for CuFFTOp.
Parameters
----------
input
Array of float32 of size (m, n), containing m inputs of length n.
"""
class CuIFFTOp(Op): class CuIRFFTOp(Op):
""" """
Performs an inverse fast Fourier transform on the GPU using the Operator for the inverse fast Fourier transform with real-valued output
scikits CUDA FFT through the gpuarray backend. on the GPU using the scikits CUDA FFT through the gpuarray backend.
The input is a variable of dimensions (m, n/2+1, 2) with The input is a variable of dimensions (m, n/2+1, 2) with
type float32 representing the n/2+1 non-trivial elements of m type float32 representing the n/2+1 non-trivial elements of m
...@@ -143,11 +141,11 @@ class CuIFFTOp(Op): ...@@ -143,11 +141,11 @@ class CuIFFTOp(Op):
parts are stored as two float32 arrays, emulating complex64 given that parts are stored as two float32 arrays, emulating complex64 given that
Theano does not support complex numbers. Theano does not support complex numbers.
The output is a float32 variable of dimensions (m, n) giving the m The output is a real-valued float32 variable of dimensions (m, n)
inverse FFTs. *The output is NOT normalized*. You can manualy divide giving the m inverse FFTs. *The output is NOT normalized*. You can
by the size of the output array to normalize. manualy divide by the size of the output array to normalize.
The module provides the convenience function cuifft(input). The module provides the convenience function cuirfft(input).
""" """
__props__ = () __props__ = ()
...@@ -160,11 +158,14 @@ class CuIFFTOp(Op): ...@@ -160,11 +158,14 @@ class CuIFFTOp(Op):
def make_node(self, inp): def make_node(self, inp):
if not scikits_cuda_available: if not scikits_cuda_available:
raise RuntimeError("scikits.cuda is needed for CuFFTOp") raise RuntimeError("scikits.cuda is needed for CuIFFTOp")
if not pygpu_available: if not pygpu_available:
raise RuntimeError("pygpu is needed for CuFFTOp") raise RuntimeError("pygpu is needed for CuIFFTOp")
# inp = as_gpuarray_variable(inp)
if not pycuda_available:
raise RuntimeError("pycuda is needed for CuIFFTOp")
inp = basic_ops.gpu_contiguous( inp = basic_ops.gpu_contiguous(
basic_ops.as_gpuarray_variable(inp, basic_ops.as_gpuarray_variable(inp,
basic_ops.infer_context_name(inp))) basic_ops.infer_context_name(inp)))
...@@ -221,6 +222,10 @@ class CuIFFTOp(Op): ...@@ -221,6 +222,10 @@ class CuIFFTOp(Op):
output_pycuda.sync() output_pycuda.sync()
fft.ifft(input_pycuda, output_pycuda, plan[0]) fft.ifft(input_pycuda, output_pycuda, plan[0])
# strangely enough, enabling rescaling here makes it run
# very, very slowly. so do this rescaling manually
# afterwards!
# Sync results to ensure output contains completed computation # Sync results to ensure output contains completed computation
pycuda.driver.Context.synchronize() pycuda.driver.Context.synchronize()
...@@ -230,13 +235,34 @@ class CuIFFTOp(Op): ...@@ -230,13 +235,34 @@ class CuIFFTOp(Op):
return thunk return thunk
cuifft = CuIFFTOp()
"""
Convenience function for CuIFFTOp.
Parameters def curfft(inputs):
---------- """
input Performs the real unitary fast Fourier Transform normalized
by :math:`\sqrt n`.
Parameters
----------
inputs
Array of real-valued float32 of size (m, n), containing m inputs of
length n.
"""
fft_op = CuRFFTOp()
return fft_op(inputs) / T.sqrt(((inputs.shape[1:]).prod()).astype('float32'))
def cuirfft(inputs):
"""
Performs the real unitary fast inverse Fourier Transform normalized
by :math:`\sqrt n`.
Parameters
----------
inputs
Array of float32 of size (m, n/2+1, 2), containing m inputs with n/2+1 Array of float32 of size (m, n/2+1, 2), containing m inputs with n/2+1
non-trivial elements and real and imaginary parts stored as separate arrays. non-trivial elements and real and imaginary parts stored as separate
""" arrays.
"""
ifft_op = CuIRFFTOp()
return ifft_op(inputs) / T.sqrt((((inputs.shape[1:-1] - 1) * 2).prod())
.astype('float32'))
...@@ -3,58 +3,58 @@ import unittest ...@@ -3,58 +3,58 @@ import unittest
import numpy as np import numpy as np
import theano import theano
import theano.tensor
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
import theano.gpuarray.fft
import numpy.fft
from .config import mode_with_gpu
# Skip tests if pygpu is not available. # Skip tests if pygpu is not available.
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from theano.gpuarray.fft import pygpu_available, scikits_cuda_available from theano.gpuarray.fft import pygpu_available, scikits_cuda_available
from theano.gpuarray.fft import pycuda_available
if not pygpu_available: # noqa if not pygpu_available: # noqa
raise SkipTest('Optional package pygpu not available') raise SkipTest('Optional package pygpu not available')
if not scikits_cuda_available: # noqa if not scikits_cuda_available: # noqa
raise SkipTest('Optional package scikits.cuda not available') raise SkipTest('Optional package scikits.cuda not available')
if not pycuda_available: # noqa
import theano.gpuarray.fft raise SkipTest('Optional package pycuda not available')
import theano.tensor.fourier
from .config import mode_with_gpu
class TestFFT(unittest.TestCase): class TestFFT(unittest.TestCase):
def test_fft(self): def test_rfft(self):
N = 64 N = 64
inputs_val = np.random.random((1, N)).astype('float32') inputs_val = np.random.random((1, N)).astype('float32')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
fft_ref = theano.tensor.fourier.fft(inputs, N, 1) rfft = theano.gpuarray.fft.curfft(inputs)
fft = theano.gpuarray.fft.cufft(inputs) f_rfft = theano.function([], rfft, mode=mode_with_gpu)
res_rfft = f_rfft()
f_ref = theano.function([], fft_ref) res_rfft_comp = (np.asarray(res_rfft[:, :, 0]) +
f_fft = theano.function([], fft, mode=mode_with_gpu) 1j * np.asarray(res_rfft[:, :, 1]))
res_ref = f_ref()
res_fft = f_fft()
res_fft_comp = (np.asarray(res_fft[:, :, 0]) + rfft_ref = numpy.fft.rfft(inputs_val, N, 1, norm='ortho')
1j * np.asarray(res_fft[:, :, 1]))
utt.assert_allclose(res_ref[0][0:N / 2 + 1], res_fft_comp) utt.assert_allclose(rfft_ref, res_rfft_comp)
def test_ifft(self): def test_irfft(self):
N = 64 N = 64
inputs_val = np.random.random((1, N)).astype('float32') inputs_val = np.random.random((1, N)).astype('float32')
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
fft = theano.gpuarray.fft.cufft(inputs) fft = theano.gpuarray.fft.curfft(inputs)
f_fft = theano.function([], fft, mode=mode_with_gpu) f_fft = theano.function([], fft, mode=mode_with_gpu)
res_fft = f_fft() res_fft = f_fft()
m = fft.type() m = fft.type()
ifft = theano.gpuarray.fft.cuifft(m) ifft = theano.gpuarray.fft.cuirfft(m)
f_ifft = theano.function([m], ifft, mode=mode_with_gpu) f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
res_ifft = f_ifft(res_fft) res_ifft = f_ifft(res_fft)
utt.assert_allclose(inputs_val, np.asarray(res_ifft) / N) utt.assert_allclose(inputs_val, np.asarray(res_ifft))
def test_type(self): def test_type(self):
N = 64 N = 64
...@@ -62,6 +62,6 @@ class TestFFT(unittest.TestCase): ...@@ -62,6 +62,6 @@ class TestFFT(unittest.TestCase):
inputs = theano.shared(inputs_val) inputs = theano.shared(inputs_val)
with self.assertRaises(AssertionError): with self.assertRaises(AssertionError):
theano.gpuarray.fft.cufft(inputs) theano.gpuarray.fft.curfft(inputs)
with self.assertRaises(AssertionError): with self.assertRaises(AssertionError):
theano.gpuarray.fft.cuifft(inputs) theano.gpuarray.fft.cuirfft(inputs)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论