Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
46cf0822
提交
46cf0822
authored
5月 20, 2016
作者:
slefrancois
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
gpuarray fft: guard pycuda import, add unit test, change interface to curfft
上级
04183580
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
86 行增加
和
60 行删除
+86
-60
fft.py
theano/gpuarray/fft.py
+63
-37
test_fft.py
theano/gpuarray/tests/test_fft.py
+23
-23
没有找到文件。
theano/gpuarray/fft.py
浏览文件 @
46cf0822
...
...
@@ -3,6 +3,7 @@ from __future__ import absolute_import, print_function, division
import
numpy
as
np
import
theano
from
theano
import
Op
import
theano.tensor
as
T
from
theano.gpuarray
import
(
basic_ops
,
GpuArrayType
)
...
...
@@ -12,7 +13,11 @@ try:
except
ImportError
:
pygpu_available
=
False
import
pycuda.driver
try
:
import
pycuda.driver
pycuda_available
=
True
except
ImportError
:
pycuda_available
=
False
try
:
import
scikits.cuda
...
...
@@ -22,21 +27,21 @@ except (ImportError, Exception):
scikits_cuda_available
=
False
class
CuFFTOp
(
Op
):
class
Cu
R
FFTOp
(
Op
):
"""
Performs a fast Fourier transform on the GPU using the scikits CUDA FFT
through the gpuarray backend.
Operator for the fast Fourier transform of a real-valued output on the GPU
using the scikits CUDA FFT
through the gpuarray backend.
The input must be a float32 variable of dimensions (m, n). It
The input must be a
real-valued
float32 variable of dimensions (m, n). It
performs m 1-D FFTs of size n each.
The output is a GpuArray of dimensions (m, n/2+1, 2). The output contains
the n/2+1 non-trivial elements of the m real-valued FFTs. The real
and imaginary parts stored as two float32 arrays, emulating complex64.
the n/
/
2+1 non-trivial elements of the m real-valued FFTs. The real
and imaginary parts
are
stored as two float32 arrays, emulating complex64.
Since theano does not support complex number operations, care must be
taken to manually implement operators such as multiplication.
The module provides the convenience function cufft(input).
The module provides the convenience function cu
r
fft(input).
"""
__props__
=
()
...
...
@@ -54,6 +59,9 @@ class CuFFTOp(Op):
if
not
pygpu_available
:
raise
RuntimeError
(
"pygpu is needed for CuFFTOp"
)
if
not
pycuda_available
:
raise
RuntimeError
(
"pycuda is needed for CuFFTOp"
)
inp
=
basic_ops
.
gpu_contiguous
(
basic_ops
.
as_gpuarray_variable
(
inp
,
basic_ops
.
infer_context_name
(
inp
)))
...
...
@@ -121,21 +129,11 @@ class CuFFTOp(Op):
return
thunk
cufft
=
CuFFTOp
()
"""
Convenience function for CuFFTOp.
Parameters
----------
input
Array of float32 of size (m, n), containing m inputs of length n.
"""
class
CuIFFTOp
(
Op
):
class
CuIRFFTOp
(
Op
):
"""
Performs an inverse fast Fourier transform on the GPU using the
scikits CUDA FFT through the gpuarray backend.
Operator for the inverse fast Fourier transform with real-valued output
on the GPU using the
scikits CUDA FFT through the gpuarray backend.
The input is a variable of dimensions (m, n/2+1, 2) with
type float32 representing the n/2+1 non-trivial elements of m
...
...
@@ -143,11 +141,11 @@ class CuIFFTOp(Op):
parts are stored as two float32 arrays, emulating complex64 given that
Theano does not support complex numbers.
The output is a
float32 variable of dimensions (m, n) giving the m
inverse FFTs. *The output is NOT normalized*. You can manualy divide
by the size of the output array to normalize.
The output is a
real-valued float32 variable of dimensions (m, n)
giving the m inverse FFTs. *The output is NOT normalized*. You can
manualy divide
by the size of the output array to normalize.
The module provides the convenience function cuifft(input).
The module provides the convenience function cui
r
fft(input).
"""
__props__
=
()
...
...
@@ -160,11 +158,14 @@ class CuIFFTOp(Op):
def
make_node
(
self
,
inp
):
if
not
scikits_cuda_available
:
raise
RuntimeError
(
"scikits.cuda is needed for CuFFTOp"
)
raise
RuntimeError
(
"scikits.cuda is needed for Cu
I
FFTOp"
)
if
not
pygpu_available
:
raise
RuntimeError
(
"pygpu is needed for CuFFTOp"
)
# inp = as_gpuarray_variable(inp)
raise
RuntimeError
(
"pygpu is needed for CuIFFTOp"
)
if
not
pycuda_available
:
raise
RuntimeError
(
"pycuda is needed for CuIFFTOp"
)
inp
=
basic_ops
.
gpu_contiguous
(
basic_ops
.
as_gpuarray_variable
(
inp
,
basic_ops
.
infer_context_name
(
inp
)))
...
...
@@ -221,6 +222,10 @@ class CuIFFTOp(Op):
output_pycuda
.
sync
()
fft
.
ifft
(
input_pycuda
,
output_pycuda
,
plan
[
0
])
# strangely enough, enabling rescaling here makes it run
# very, very slowly. so do this rescaling manually
# afterwards!
# Sync results to ensure output contains completed computation
pycuda
.
driver
.
Context
.
synchronize
()
...
...
@@ -230,13 +235,34 @@ class CuIFFTOp(Op):
return
thunk
cuifft
=
CuIFFTOp
()
"""
Convenience function for CuIFFTOp.
Parameters
----------
input
Array of float32 of size (m, n/2+1, 2), containing m inputs with n/2+1
non-trivial elements and real and imaginary parts stored as separate arrays.
"""
def
curfft
(
inputs
):
"""
Performs the real unitary fast Fourier Transform normalized
by :math:`
\
sqrt n`.
Parameters
----------
inputs
Array of real-valued float32 of size (m, n), containing m inputs of
length n.
"""
fft_op
=
CuRFFTOp
()
return
fft_op
(
inputs
)
/
T
.
sqrt
(((
inputs
.
shape
[
1
:])
.
prod
())
.
astype
(
'float32'
))
def
cuirfft
(
inputs
):
"""
Performs the real unitary fast inverse Fourier Transform normalized
by :math:`
\
sqrt n`.
Parameters
----------
inputs
Array of float32 of size (m, n/2+1, 2), containing m inputs with n/2+1
non-trivial elements and real and imaginary parts stored as separate
arrays.
"""
ifft_op
=
CuIRFFTOp
()
return
ifft_op
(
inputs
)
/
T
.
sqrt
((((
inputs
.
shape
[
1
:
-
1
]
-
1
)
*
2
)
.
prod
())
.
astype
(
'float32'
))
theano/gpuarray/tests/test_fft.py
浏览文件 @
46cf0822
...
...
@@ -3,58 +3,58 @@ import unittest
import
numpy
as
np
import
theano
import
theano.tensor
from
theano.tests
import
unittest_tools
as
utt
import
theano.gpuarray.fft
import
numpy.fft
from
.config
import
mode_with_gpu
# Skip tests if pygpu is not available.
from
nose.plugins.skip
import
SkipTest
from
theano.gpuarray.fft
import
pygpu_available
,
scikits_cuda_available
from
theano.gpuarray.fft
import
pycuda_available
if
not
pygpu_available
:
# noqa
raise
SkipTest
(
'Optional package pygpu not available'
)
if
not
scikits_cuda_available
:
# noqa
raise
SkipTest
(
'Optional package scikits.cuda not available'
)
import
theano.gpuarray.fft
import
theano.tensor.fourier
from
.config
import
mode_with_gpu
if
not
pycuda_available
:
# noqa
raise
SkipTest
(
'Optional package pycuda not available'
)
class
TestFFT
(
unittest
.
TestCase
):
def
test_fft
(
self
):
def
test_
r
fft
(
self
):
N
=
64
inputs_val
=
np
.
random
.
random
((
1
,
N
))
.
astype
(
'float32'
)
inputs
=
theano
.
shared
(
inputs_val
)
fft_ref
=
theano
.
tensor
.
fourier
.
fft
(
inputs
,
N
,
1
)
fft
=
theano
.
gpuarray
.
fft
.
cufft
(
inputs
)
f_ref
=
theano
.
function
([],
fft_ref
)
f_fft
=
theano
.
function
([],
fft
,
mode
=
mode_with_gpu
)
res_ref
=
f_ref
()
res_fft
=
f_fft
()
rfft
=
theano
.
gpuarray
.
fft
.
curfft
(
inputs
)
f_rfft
=
theano
.
function
([],
rfft
,
mode
=
mode_with_gpu
)
res_rfft
=
f_rfft
()
res_rfft_comp
=
(
np
.
asarray
(
res_rfft
[:,
:,
0
])
+
1
j
*
np
.
asarray
(
res_rfft
[:,
:,
1
]))
res_fft_comp
=
(
np
.
asarray
(
res_fft
[:,
:,
0
])
+
1
j
*
np
.
asarray
(
res_fft
[:,
:,
1
]))
rfft_ref
=
numpy
.
fft
.
rfft
(
inputs_val
,
N
,
1
,
norm
=
'ortho'
)
utt
.
assert_allclose
(
r
es_ref
[
0
][
0
:
N
/
2
+
1
],
res_
fft_comp
)
utt
.
assert_allclose
(
r
fft_ref
,
res_r
fft_comp
)
def
test_ifft
(
self
):
def
test_i
r
fft
(
self
):
N
=
64
inputs_val
=
np
.
random
.
random
((
1
,
N
))
.
astype
(
'float32'
)
inputs
=
theano
.
shared
(
inputs_val
)
fft
=
theano
.
gpuarray
.
fft
.
cufft
(
inputs
)
fft
=
theano
.
gpuarray
.
fft
.
cu
r
fft
(
inputs
)
f_fft
=
theano
.
function
([],
fft
,
mode
=
mode_with_gpu
)
res_fft
=
f_fft
()
m
=
fft
.
type
()
ifft
=
theano
.
gpuarray
.
fft
.
cuifft
(
m
)
ifft
=
theano
.
gpuarray
.
fft
.
cui
r
fft
(
m
)
f_ifft
=
theano
.
function
([
m
],
ifft
,
mode
=
mode_with_gpu
)
res_ifft
=
f_ifft
(
res_fft
)
utt
.
assert_allclose
(
inputs_val
,
np
.
asarray
(
res_ifft
)
/
N
)
utt
.
assert_allclose
(
inputs_val
,
np
.
asarray
(
res_ifft
))
def
test_type
(
self
):
N
=
64
...
...
@@ -62,6 +62,6 @@ class TestFFT(unittest.TestCase):
inputs
=
theano
.
shared
(
inputs_val
)
with
self
.
assertRaises
(
AssertionError
):
theano
.
gpuarray
.
fft
.
cufft
(
inputs
)
theano
.
gpuarray
.
fft
.
cu
r
fft
(
inputs
)
with
self
.
assertRaises
(
AssertionError
):
theano
.
gpuarray
.
fft
.
cuifft
(
inputs
)
theano
.
gpuarray
.
fft
.
cui
r
fft
(
inputs
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论