Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
75550055
提交
75550055
authored
5月 28, 2014
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1870 from abergeron/cuda_fftconv
Cuda fftconv
上级
5f219fca
4cf06d2b
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
147 行增加
和
7 行删除
+147
-7
conv.txt
doc/library/tensor/nnet/conv.txt
+6
-0
pycuda_utils.py
theano/misc/pycuda_utils.py
+1
-1
__init__.py
theano/sandbox/cuda/__init__.py
+0
-1
fftconv.py
theano/sandbox/cuda/fftconv.py
+0
-0
opt.py
theano/sandbox/cuda/opt.py
+21
-1
test_fftconv.py
theano/sandbox/cuda/tests/test_fftconv.py
+113
-0
test_conv.py
theano/tensor/nnet/tests/test_conv.py
+6
-4
没有找到文件。
doc/library/tensor/nnet/conv.txt
浏览文件 @
75550055
...
@@ -27,6 +27,12 @@ TODO: Give examples for how to use these things! They are pretty complicated.
...
@@ -27,6 +27,12 @@ TODO: Give examples for how to use these things! They are pretty complicated.
- Conv implemented
- Conv implemented
- :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>`.
- :func:`signal.conv2d <theano.tensor.signal.conv.conv2d>`.
- :func:`nnet.conv2d <theano.tensor.nnet.conv.conv2d>`.
- :func:`nnet.conv2d <theano.tensor.nnet.conv.conv2d>`.
- :func:`conv2d_fft <theano.sandbox.cuda.fftconv.conv2d_fft>`
This is a GPU-only version of conv2d that uses an FFT transform
to perform the work. You can enable it by setting
'THEANO_FLAGS=optimizer_including=conv_fft_valid:conv_fft_full'
in your environement. This is not enabled by default because it
has some restrictions on input and uses more memory.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`. Doesn't work on the GPU.
- :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`. Doesn't work on the GPU.
- :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>`
- :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>`
Another conv3d implementation that uses the conv2d with data reshaping.
Another conv3d implementation that uses the conv2d with data reshaping.
...
...
theano/misc/pycuda_utils.py
浏览文件 @
75550055
import
numpy
import
numpy
import
pycuda.gpuarray
import
pycuda.gpuarray
import
theano.sandbox.cuda
as
cuda
from
theano.sandbox
import
cuda
if
cuda
.
cuda_available
==
False
:
if
cuda
.
cuda_available
==
False
:
raise
ImportError
(
'Optional theano package cuda disabled'
)
raise
ImportError
(
'Optional theano package cuda disabled'
)
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
75550055
...
@@ -33,7 +33,6 @@ AddConfigVar('cublas.lib',
...
@@ -33,7 +33,6 @@ AddConfigVar('cublas.lib',
"""Name of the cuda blas library for the linker."""
,
"""Name of the cuda blas library for the linker."""
,
StrParam
(
'cublas'
))
StrParam
(
'cublas'
))
#is_nvcc_available called here to initialize global vars in
#is_nvcc_available called here to initialize global vars in
#nvcc_compiler module
#nvcc_compiler module
nvcc_compiler
.
is_nvcc_available
()
nvcc_compiler
.
is_nvcc_available
()
...
...
theano/sandbox/cuda/fftconv.py
0 → 100644
浏览文件 @
75550055
差异被折叠。
点击展开。
theano/sandbox/cuda/opt.py
浏览文件 @
75550055
...
@@ -40,6 +40,7 @@ from theano.sandbox.cuda.elemwise import SupportCodeError
...
@@ -40,6 +40,7 @@ from theano.sandbox.cuda.elemwise import SupportCodeError
from
theano.scalar.basic_scipy
import
Erfinv
from
theano.scalar.basic_scipy
import
Erfinv
from
theano.sandbox.cuda.elemwise
import
erfinv_gpu
from
theano.sandbox.cuda.elemwise
import
erfinv_gpu
from
theano.sandbox.cuda.var
import
CudaNdarrayConstant
from
theano.sandbox.cuda.var
import
CudaNdarrayConstant
from
theano.sandbox.cuda.fftconv
import
conv2d_fft
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.tensor.blas
import
_is_real_vector
,
_is_real_matrix
from
theano.tensor.blas
import
_is_real_vector
,
_is_real_matrix
linalg
=
None
linalg
=
None
...
@@ -1118,8 +1119,27 @@ def local_gpu_conv(node):
...
@@ -1118,8 +1119,27 @@ def local_gpu_conv(node):
# differently then the gpu ConvOp
# differently then the gpu ConvOp
return
[
out
]
return
[
out
]
import
theano.tensor.signal.downsample
as
downsample
@local_optimizer
([
GpuConv
])
def
local_conv_fft_valid
(
node
):
if
(
isinstance
(
node
.
op
,
GpuConv
)
and
node
.
op
.
border_mode
==
'valid'
and
node
.
op
.
subsample
==
(
1
,
1
)):
return
[
conv2d_fft
(
node
.
inputs
[
0
],
node
.
inputs
[
1
])]
@local_optimizer
([
GpuConv
])
def
local_conv_fft_full
(
node
):
if
(
isinstance
(
node
.
op
,
GpuConv
)
and
node
.
op
.
border_mode
==
'full'
and
node
.
op
.
subsample
==
(
1
,
1
)):
return
[
conv2d_fft
(
node
.
inputs
[
0
],
node
.
inputs
[
1
],
border_mode
=
'full'
)]
gpu_optimizer
.
register
(
"conv_fft_valid"
,
local_conv_fft_valid
)
gpu_optimizer
.
register
(
"conv_fft_full"
,
local_conv_fft_full
)
import
theano.tensor.signal.downsample
as
downsample
@register_opt
()
@register_opt
()
@local_optimizer
([
downsample
.
DownsampleFactorMax
])
@local_optimizer
([
downsample
.
DownsampleFactorMax
])
...
...
theano/sandbox/cuda/tests/test_fftconv.py
0 → 100644
浏览文件 @
75550055
import
unittest
import
numpy
import
theano
from
theano.tests
import
unittest_tools
as
utt
# Skip tests if cuda_ndarray is not available.
from
nose.plugins.skip
import
SkipTest
import
theano.sandbox.cuda
as
cuda_ndarray
if
cuda_ndarray
.
cuda_available
==
False
:
raise
SkipTest
(
'Optional package cuda disabled'
)
from
theano.sandbox.cuda
import
float32_shared_constructor
as
shared
if
theano
.
config
.
mode
==
'FAST_COMPILE'
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
including
(
'gpu'
)
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
)
class
TestConv2dFFT
(
unittest
.
TestCase
):
def
run_conv
(
self
,
inputs_shape
,
filters_shape
,
pad
=
False
,
**
other_args
):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
inputs
=
shared
(
inputs_val
)
filters
=
shared
(
filters_val
)
conv_ref
=
theano
.
tensor
.
nnet
.
conv
.
conv2d
(
inputs
,
filters
,
**
other_args
)
conv_fft
=
theano
.
sandbox
.
cuda
.
fftconv
.
conv2d_fft
(
inputs
,
filters
,
pad_last_dim
=
pad
,
**
other_args
)
f_ref
=
theano
.
function
([],
conv_ref
)
f_fft
=
theano
.
function
([],
conv_fft
,
mode
=
mode_with_gpu
)
res_ref
=
f_ref
()
res_fft
=
f_fft
()
utt
.
assert_allclose
(
res_ref
,
res_fft
)
def
test_valid
(
self
):
self
.
run_conv
(
inputs_shape
=
(
5
,
3
,
7
,
6
),
filters_shape
=
(
2
,
3
,
3
,
3
),
border_mode
=
'valid'
)
self
.
run_conv
(
inputs_shape
=
(
5
,
3
,
7
,
7
),
filters_shape
=
(
2
,
3
,
3
,
3
),
border_mode
=
'valid'
,
pad
=
True
)
def
test_full
(
self
):
self
.
run_conv
(
inputs_shape
=
(
5
,
3
,
7
,
6
),
filters_shape
=
(
2
,
3
,
3
,
3
),
border_mode
=
'full'
)
self
.
run_conv
(
inputs_shape
=
(
5
,
3
,
7
,
7
),
filters_shape
=
(
2
,
3
,
3
,
3
),
border_mode
=
'full'
,
pad
=
True
)
def
test_opt_valid
(
self
):
inputs_shape
=
(
5
,
3
,
7
,
6
)
filters_shape
=
(
2
,
3
,
3
,
3
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
inputs
=
shared
(
inputs_val
)
filters
=
shared
(
filters_val
)
conv
=
theano
.
tensor
.
nnet
.
conv
.
conv2d
(
inputs
,
filters
)
mode
=
mode_with_gpu
.
including
(
'conv_fft_valid'
)
f_ref
=
theano
.
function
([],
conv
)
f_fft
=
theano
.
function
([],
conv
,
mode
=
mode
)
# make sure we inserted the fft trickery
topo
=
f_fft
.
maker
.
fgraph
.
toposort
()
assert
sum
(
isinstance
(
n
.
op
,
theano
.
sandbox
.
cuda
.
fftconv
.
CuFFTOp
)
for
n
in
topo
)
==
2
res_ref
=
f_ref
()
res_fft
=
f_fft
()
utt
.
assert_allclose
(
res_ref
,
res_fft
)
def
test_opt_full
(
self
):
inputs_shape
=
(
5
,
3
,
7
,
6
)
filters_shape
=
(
2
,
3
,
3
,
3
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
inputs
=
shared
(
inputs_val
)
filters
=
shared
(
filters_val
)
conv
=
theano
.
tensor
.
nnet
.
conv
.
conv2d
(
inputs
,
filters
,
border_mode
=
'full'
)
mode
=
mode_with_gpu
.
including
(
'conv_fft_full'
)
f_ref
=
theano
.
function
([],
conv
)
f_fft
=
theano
.
function
([],
conv
,
mode
=
mode
)
# make sure we inserted the fft trickery
topo
=
f_fft
.
maker
.
fgraph
.
toposort
()
assert
sum
(
isinstance
(
n
.
op
,
theano
.
sandbox
.
cuda
.
fftconv
.
CuFFTOp
)
for
n
in
topo
)
==
2
res_ref
=
f_ref
()
res_fft
=
f_fft
()
utt
.
assert_allclose
(
res_ref
,
res_fft
)
theano/tensor/nnet/tests/test_conv.py
浏览文件 @
75550055
...
@@ -12,9 +12,11 @@ from theano.tensor.basic import _allclose, NotScalarConstantError
...
@@ -12,9 +12,11 @@ from theano.tensor.basic import _allclose, NotScalarConstantError
class
TestConv2D
(
utt
.
InferShapeTester
):
class
TestConv2D
(
utt
.
InferShapeTester
):
mode
=
None
dtype
=
'float64'
def
setUp
(
self
):
def
setUp
(
self
):
super
(
TestConv2D
,
self
)
.
setUp
()
super
(
TestConv2D
,
self
)
.
setUp
()
self
.
input
=
T
.
dtensor4
(
'input'
)
self
.
input
=
T
.
dtensor4
(
'input'
)
self
.
input
.
name
=
'default_V'
self
.
input
.
name
=
'default_V'
self
.
filters
=
T
.
dtensor4
(
'filters'
)
self
.
filters
=
T
.
dtensor4
(
'filters'
)
...
@@ -67,11 +69,11 @@ class TestConv2D(utt.InferShapeTester):
...
@@ -67,11 +69,11 @@ class TestConv2D(utt.InferShapeTester):
output
=
sym_conv2d
(
input
,
filters
)
output
=
sym_conv2d
(
input
,
filters
)
output
.
name
=
'conv2d(
%
s,
%
s)'
%
(
input
.
name
,
filters
.
name
)
output
.
name
=
'conv2d(
%
s,
%
s)'
%
(
input
.
name
,
filters
.
name
)
theano_conv
=
theano
.
function
([
input
,
filters
],
output
)
theano_conv
=
theano
.
function
([
input
,
filters
],
output
,
mode
=
self
.
mode
)
# initialize input and compute result
# initialize input and compute result
image_data
=
numpy
.
random
.
random
(
N_image_shape
)
image_data
=
numpy
.
random
.
random
(
N_image_shape
)
.
astype
(
self
.
dtype
)
filter_data
=
numpy
.
random
.
random
(
N_filter_shape
)
filter_data
=
numpy
.
random
.
random
(
N_filter_shape
)
.
astype
(
self
.
dtype
)
try
:
try
:
theano_output
=
theano_conv
(
image_data
,
filter_data
)
theano_output
=
theano_conv
(
image_data
,
filter_data
)
except
ValueError
:
except
ValueError
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论