Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
ef0fc56e
提交
ef0fc56e
authored
4月 12, 2016
作者:
Frédéric Bastien
提交者:
Frederic Bastien
4月 13, 2016
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Merge pull request #4344 from mgermain/cudnn5
Added cuDNN v5 support & Added optional dependencies to setup.py Conflicts: .travis.yml doc/install.txt
上级
02453383
显示空白字符变更
内嵌
并排
正在显示
21 个修改的文件
包含
372 行增加
和
183 行删除
+372
-183
.travis.yml
.travis.yml
+1
-0
install.txt
doc/install.txt
+1
-1
requirement-rtd.txt
requirement-rtd.txt
+2
-0
setup.py
setup.py
+5
-0
configdefaults.py
theano/configdefaults.py
+17
-9
dnn.py
theano/sandbox/cuda/dnn.py
+129
-67
dnn_base.c
theano/sandbox/cuda/dnn_base.c
+4
-1
dnn_fwd.c
theano/sandbox/cuda/dnn_fwd.c
+14
-1
dnn_gi.c
theano/sandbox/cuda/dnn_gi.c
+2
-2
dnn_gw.c
theano/sandbox/cuda/dnn_gw.c
+2
-2
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+2
-1
conv_desc.c
theano/sandbox/gpuarray/conv_desc.c
+1
-1
dnn.py
theano/sandbox/gpuarray/dnn.py
+55
-36
dnn_base.c
theano/sandbox/gpuarray/dnn_base.c
+8
-1
dnn_fwd.c
theano/sandbox/gpuarray/dnn_fwd.c
+21
-6
dnn_gi.c
theano/sandbox/gpuarray/dnn_gi.c
+8
-8
dnn_gw.c
theano/sandbox/gpuarray/dnn_gw.c
+8
-8
dnn_pool.c
theano/sandbox/gpuarray/dnn_pool.c
+5
-0
dnn_pool_grad.c
theano/sandbox/gpuarray/dnn_pool_grad.c
+5
-0
test_dnn.py
theano/sandbox/gpuarray/tests/test_dnn.py
+71
-39
unittest_tools.py
theano/tests/unittest_tools.py
+11
-0
没有找到文件。
.travis.yml
浏览文件 @
ef0fc56e
...
@@ -37,6 +37,7 @@ install:
...
@@ -37,6 +37,7 @@ install:
-
source activate pyenv
-
source activate pyenv
-
if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi
-
if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi
-
pip install . --no-deps
-
pip install . --no-deps
-
pip install nose-parameterized==0.5.0
# command to run tests
# command to run tests
env
:
env
:
...
...
doc/install.txt
浏览文件 @
ef0fc56e
...
@@ -49,7 +49,7 @@ instructions below for detailed installation steps):
...
@@ -49,7 +49,7 @@ instructions below for detailed installation steps):
The following libraries and software are optional:
The following libraries and software are optional:
`nose <http://
somethingaboutorange.com/mrl/projects/nose/>`_ >= 1.3
.0
`nose <http://
nose.readthedocs.org/en/latest/>`_ >= 1.3.0 and `nose-parameterized <https://pypi.python.org/pypi/nose-parameterized/>`_ >= 0.5
.0
Recommended, to run Theano's test-suite.
Recommended, to run Theano's test-suite.
`Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_
`Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_
...
...
requirement-rtd.txt
浏览文件 @
ef0fc56e
sphinx>=1.3.0
sphinx>=1.3.0
pygments
nose>=1.3.0
nose>=1.3.0
nose-parameterized>=0.5.0
setup.py
浏览文件 @
ef0fc56e
...
@@ -163,6 +163,11 @@ def do_setup():
...
@@ -163,6 +163,11 @@ def do_setup():
packages
=
find_packages
(),
packages
=
find_packages
(),
# 1.7.0 give too much warning related to numpy.diagonal.
# 1.7.0 give too much warning related to numpy.diagonal.
install_requires
=
[
'numpy>=1.7.1'
,
'scipy>=0.11'
,
'six>=1.9.0'
],
install_requires
=
[
'numpy>=1.7.1'
,
'scipy>=0.11'
,
'six>=1.9.0'
],
# pygments is a dependency for Sphinx code highlight
extras_require
=
{
'test'
:
[
'nose>=1.3.0'
,
'nose-parameterized>=0.5.0'
],
'doc'
:
[
'Sphinx>=0.5.1'
,
'pygments'
]
},
package_data
=
{
package_data
=
{
''
:
[
'*.txt'
,
'*.rst'
,
'*.cu'
,
'*.cuh'
,
'*.c'
,
'*.sh'
,
'*.pkl'
,
''
:
[
'*.txt'
,
'*.rst'
,
'*.cu'
,
'*.cuh'
,
'*.c'
,
'*.sh'
,
'*.pkl'
,
'*.h'
,
'*.cpp'
,
'ChangeLog'
],
'*.h'
,
'*.cpp'
,
'ChangeLog'
],
...
...
theano/configdefaults.py
浏览文件 @
ef0fc56e
...
@@ -286,6 +286,20 @@ def safe_no_dnn_algo_bwd(algo):
...
@@ -286,6 +286,20 @@ def safe_no_dnn_algo_bwd(algo):
'`dnn.conv.algo_bwd_filter` and `dnn.conv.algo_bwd_data` instead.'
)
'`dnn.conv.algo_bwd_filter` and `dnn.conv.algo_bwd_data` instead.'
)
return
True
return
True
# Those are the supported algorithm by Theano,
# The tests will reference those lists.
SUPPORTED_DNN_CONV_ALGO_FWD
=
(
'small'
,
'none'
,
'large'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
)
SUPPORTED_DNN_CONV_ALGO_BWD_DATA
=
(
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
)
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
=
(
'none'
,
'deterministic'
,
'fft'
,
'small'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
)
AddConfigVar
(
'dnn.conv.algo_bwd'
,
AddConfigVar
(
'dnn.conv.algo_bwd'
,
"This flag is deprecated; use dnn.conv.algo_bwd_data and "
"This flag is deprecated; use dnn.conv.algo_bwd_data and "
"dnn.conv.algo_bwd_filter."
,
"dnn.conv.algo_bwd_filter."
,
...
@@ -295,26 +309,20 @@ AddConfigVar('dnn.conv.algo_bwd',
...
@@ -295,26 +309,20 @@ AddConfigVar('dnn.conv.algo_bwd',
AddConfigVar
(
'dnn.conv.algo_fwd'
,
AddConfigVar
(
'dnn.conv.algo_fwd'
,
"Default implementation to use for CuDNN forward convolution."
,
"Default implementation to use for CuDNN forward convolution."
,
EnumStr
(
'small'
,
'none'
,
'large'
,
'fft'
,
'fft_tiling'
,
EnumStr
(
*
SUPPORTED_DNN_CONV_ALGO_FWD
),
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_bwd_data'
,
AddConfigVar
(
'dnn.conv.algo_bwd_data'
,
"Default implementation to use for CuDNN backward convolution to "
"Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the inputs."
,
"get the gradients of the convolution with regard to the inputs."
,
EnumStr
(
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
EnumStr
(
*
SUPPORTED_DNN_CONV_ALGO_BWD_DATA
),
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_bwd_filter'
,
AddConfigVar
(
'dnn.conv.algo_bwd_filter'
,
"Default implementation to use for CuDNN backward convolution to "
"Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the "
"get the gradients of the convolution with regard to the "
"filters."
,
"filters."
,
EnumStr
(
'none'
,
'deterministic'
,
'fft'
,
'small'
,
'guess_once'
,
EnumStr
(
*
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
),
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.precision'
,
AddConfigVar
(
'dnn.conv.precision'
,
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
ef0fc56e
...
@@ -240,7 +240,7 @@ class GpuDnnConvDesc(GpuOp):
...
@@ -240,7 +240,7 @@ class GpuDnnConvDesc(GpuOp):
}
}
}
}
err = cudnnSetConvolutionNdDescriptor
_v3
(
err = cudnnSetConvolutionNdDescriptor(
%(desc)
s,
%(desc)
s,
%(nb_dim)
d,
%(nb_dim)
d,
pad, subsample, upscale,
pad, subsample, upscale,
...
@@ -294,10 +294,8 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -294,10 +294,8 @@ class GpuDnnConv(DnnBase, COp):
The convolution descriptor.
The convolution descriptor.
workmem
workmem
*deprecated*, use parameter algo instead.
*deprecated*, use parameter algo instead.
algo
algo : {'none', 'small', 'large', 'fft', 'fft_tiling', 'guess_once', 'winograd',
['none', 'small', 'large', 'fft', 'fft_tiling', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
'guess_on_shape_change', 'time_once', 'time_on_shape_change']
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
"""
"""
...
@@ -344,8 +342,13 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -344,8 +342,13 @@ class GpuDnnConv(DnnBase, COp):
raise
RuntimeError
(
"CuDNN tiled-FFT convolution requires "
raise
RuntimeError
(
"CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent"
)
"CuDNN v4 or more recent"
)
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"CuDNN winograd convolution requires "
"CuDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
'guess_once'
,
'guess_on_shape_change'
,
'
winograd'
,
'
guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
'time_once'
,
'time_on_shape_change'
]
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
...
@@ -383,8 +386,11 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -383,8 +386,11 @@ class GpuDnnConv(DnnBase, COp):
# need v3
# need v3
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT'
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT'
elif
self
.
algo
==
'fft_tiling'
:
elif
self
.
algo
==
'fft_tiling'
:
# need v4
# need v4
for conv2d, need v5 for conv3d
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif
self
.
algo
==
'winograd'
:
# need v5
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
# The convolution implementation should be choosen according
# The convolution implementation should be choosen according
# to a heuristic
# to a heuristic
...
@@ -472,15 +478,20 @@ class GpuDnnConv3d(GpuDnnConv):
...
@@ -472,15 +478,20 @@ class GpuDnnConv3d(GpuDnnConv):
"""
"""
The forward convolution.
The forward convolution.
:param image:
Parameters
:param kernel:
----------
:param descr: the convolution descriptor
image
:param workmem:
kernel
descr
The convolution descriptor
workmem
*deprecated*, use parameter algo instead.
*deprecated*, use parameter algo instead.
:param algo: ['none', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change']
algo : {'none', 'small', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
"""
"""
__props__
=
(
'algo'
,
'inplace'
)
__props__
=
(
'algo'
,
'inplace'
)
__input_name__
=
(
'image'
,
'kernel'
,
'output'
,
__input_name__
=
(
'image'
,
'kernel'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
'descriptor'
,
'alpha'
,
'beta'
)
...
@@ -492,9 +503,24 @@ class GpuDnnConv3d(GpuDnnConv):
...
@@ -492,9 +503,24 @@ class GpuDnnConv3d(GpuDnnConv):
assert
algo
is
None
assert
algo
is
None
algo
=
workmem
algo
=
workmem
super
(
GpuDnnConv3d
,
self
)
.
__init__
(
inplace
=
inplace
,
algo
=
'none'
)
good_algo
=
[
'none'
,
'small'
,
'fft_tiling'
,
'winograd'
,
assert
self
.
algo
in
[
'none'
,
'guess_once'
,
'guess_on_shape_change'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
'time_once'
,
'time_on_shape_change'
]
if
algo
is
None
and
config
.
dnn
.
conv
.
algo_fwd
not
in
good_algo
:
algo
=
'guess_once'
elif
algo
is
not
None
and
algo
not
in
good_algo
:
algo
=
'guess_once'
super
(
GpuDnnConv3d
,
self
)
.
__init__
(
inplace
=
inplace
,
algo
=
algo
)
assert
self
.
algo
in
good_algo
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"CuDNN 3d tiled-FFT convolution requires "
"CuDNN v5 or more recent"
)
elif
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"CuDNN 3d winograd convolution requires "
"CuDNN v5 or more recent"
)
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
...
@@ -558,7 +584,8 @@ class GpuDnnConvGradW(DnnBase, COp):
...
@@ -558,7 +584,8 @@ class GpuDnnConvGradW(DnnBase, COp):
The convolution descriptor.
The convolution descriptor.
workmem
workmem
*deprecated*, use parameter algo instead.
*deprecated*, use parameter algo instead.
algo : {'none', 'deterministic', 'fft', 'small', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
algo : {'none', 'deterministic', 'fft', 'small', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
"""
"""
...
@@ -693,15 +720,20 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
...
@@ -693,15 +720,20 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
"""
"""
The convolution gradient with respect to the weights.
The convolution gradient with respect to the weights.
:param image:
Parameters
:param kernel:
----------
:param descr: the convolution descriptor
image
:param workmem:
kernel
descr
The convolution descriptor
workmem
*deprecated*, use parameter algo instead.
*deprecated*, use parameter algo instead.
:param algo: ['none', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change']
algo : {'none', 'small', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
"""
"""
__props__
=
(
'algo'
,
'inplace'
,)
__props__
=
(
'algo'
,
'inplace'
,)
__input_name__
=
(
'image'
,
'grad'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
__input_name__
=
(
'image'
,
'grad'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
...
@@ -711,11 +743,18 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
...
@@ -711,11 +743,18 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
"deprecated. Use 'algo' instead."
),
stacklevel
=
3
)
"deprecated. Use 'algo' instead."
),
stacklevel
=
3
)
assert
algo
is
None
assert
algo
is
None
algo
=
workmem
algo
=
workmem
good_algo
=
[
'none'
,
'small'
,
super
(
GpuDnnConv3dGradW
,
self
)
.
__init__
(
inplace
=
inplace
,
'guess_once'
,
'guess_on_shape_change'
,
algo
=
'none'
)
assert
self
.
algo
in
[
'none'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
'time_once'
,
'time_on_shape_change'
]
if
version
()
<
(
5000
,
5000
)
and
algo
==
'small'
:
algo
=
'guess_once'
elif
algo
is
None
and
config
.
dnn
.
conv
.
algo_bwd_filter
not
in
good_algo
:
algo
=
'guess_once'
elif
algo
is
not
None
and
algo
not
in
good_algo
:
algo
=
'guess_once'
super
(
GpuDnnConv3dGradW
,
self
)
.
__init__
(
inplace
=
inplace
,
algo
=
algo
)
assert
self
.
algo
in
good_algo
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
img
,
top
,
output
,
desc
,
alpha
,
beta
=
inp
img
,
top
,
output
,
desc
,
alpha
,
beta
=
inp
...
@@ -766,11 +805,8 @@ class GpuDnnConvGradI(DnnBase, COp):
...
@@ -766,11 +805,8 @@ class GpuDnnConvGradI(DnnBase, COp):
The convolution descriptor.
The convolution descriptor.
workmem
workmem
*deprecated*, use parameter algo instead.
*deprecated*, use parameter algo instead.
algo
algo : {'none', 'deterministic', 'fft', 'fft_tiling', 'winograd', 'guess_once',
['none', 'deterministic', 'fft', 'fft_tiling', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
'guess_on_shape_change', 'time_once',
'time_on_shape_change']
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
"""
"""
...
@@ -804,8 +840,13 @@ class GpuDnnConvGradI(DnnBase, COp):
...
@@ -804,8 +840,13 @@ class GpuDnnConvGradI(DnnBase, COp):
raise
RuntimeError
(
"CuDNN's tiled-FFT convolution requires "
raise
RuntimeError
(
"CuDNN's tiled-FFT convolution requires "
"CuDNN v4 or more recent"
)
"CuDNN v4 or more recent"
)
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"CuDNN's winograd convolution requires "
"CuDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
'guess_once'
,
'guess_on_shape_change'
,
'
winograd'
,
'
guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
'time_once'
,
'time_on_shape_change'
]
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
...
@@ -859,7 +900,11 @@ class GpuDnnConvGradI(DnnBase, COp):
...
@@ -859,7 +900,11 @@ class GpuDnnConvGradI(DnnBase, COp):
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
elif
self
.
algo
==
'fft_tiling'
:
elif
self
.
algo
==
'fft_tiling'
:
# need v4, big workspace, but less then fft
# need v4, big workspace, but less then fft
# need v5, for conv3d.
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
elif
self
.
algo
==
'winograd'
:
# need v5
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD'
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
# The convolution implementation should be chosen according
# The convolution implementation should be chosen according
# to a heuristic
# to a heuristic
...
@@ -913,17 +958,20 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
...
@@ -913,17 +958,20 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
"""
"""
The convolution gradient with respect to the inputs.
The convolution gradient with respect to the inputs.
:param image:
Parameters
:param kernel:
----------
:param descr: the convolution descriptor
image
:param workmem:
kernel
descr
The convolution descriptor
workmem
*deprecated*, use parameter algo instead.
*deprecated*, use parameter algo instead.
:param algo: ['none', 'guess_once', 'guess_on_shape_change',
algo : {'none', 'deterministic, 'fft_tiling', 'winograd', 'guess_once',
'time_once', 'time_on_shape_change']
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
"""
"""
__props__
=
(
'algo'
,
'inplace'
,)
__props__
=
(
'algo'
,
'inplace'
,)
__input_name__
=
(
'kernel'
,
'grad'
,
'output'
,
'descriptor'
,
'alpha'
,
__input_name__
=
(
'kernel'
,
'grad'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
'beta'
)
...
@@ -935,10 +983,24 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
...
@@ -935,10 +983,24 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
assert
algo
is
None
assert
algo
is
None
algo
=
workmem
algo
=
workmem
good_algo
=
[
'none'
,
'deterministic'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
if
algo
is
None
and
config
.
dnn
.
conv
.
algo_bwd_data
not
in
good_algo
:
algo
=
'guess_once'
elif
algo
is
not
None
and
algo
not
in
good_algo
:
algo
=
'guess_once'
super
(
GpuDnnConv3dGradI
,
self
)
.
__init__
(
inplace
=
inplace
,
super
(
GpuDnnConv3dGradI
,
self
)
.
__init__
(
inplace
=
inplace
,
algo
=
"none"
)
algo
=
algo
)
assert
self
.
algo
in
[
'none'
,
'guess_once'
,
'guess_on_shape_change'
,
assert
self
.
algo
in
good_algo
'time_once'
,
'time_on_shape_change'
]
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"CuDNN 3d tiled-FFT convolution requires "
"CuDNN v5 or more recent"
)
elif
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"CuDNN 3d winograd convolution requires "
"CuDNN v5 or more recent"
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
kerns
,
top
,
output
,
desc
,
alpha
,
beta
=
inp
kerns
,
top
,
output
,
desc
,
alpha
,
beta
=
inp
...
@@ -1100,7 +1162,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -1100,7 +1162,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
def
dnn_conv3d
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
def
dnn_conv3d
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
,
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
,
algo
=
'none'
,
precision
=
None
):
algo
=
None
,
precision
=
None
):
"""
"""
GPU convolution using cuDNN from NVIDIA.
GPU convolution using cuDNN from NVIDIA.
...
@@ -1360,8 +1422,9 @@ class GpuDnnPoolDesc(GpuOp):
...
@@ -1360,8 +1422,9 @@ class GpuDnnPoolDesc(GpuOp):
int win[
%(nd)
d] = {
%(win)
s};
int win[
%(nd)
d] = {
%(win)
s};
int pad[
%(nd)
d] = {
%(pad)
s};
int pad[
%(nd)
d] = {
%(pad)
s};
int str[
%(nd)
d] = {
%(str)
s};
int str[
%(nd)
d] = {
%(str)
s};
err = cudnnSetPoolingNdDescriptor(
err = cudnnSetPoolingNdDescriptor_v4(
%(desc)
s,
%(mode_flag)
s,
%(nd)
d,
%(desc)
s,
%(mode_flag)
s,
CUDNN_PROPAGATE_NAN,
%(nd)
d,
win, pad, str);
win, pad, str);
}
}
if (err != CUDNN_STATUS_SUCCESS) {
if (err != CUDNN_STATUS_SUCCESS) {
...
@@ -1397,6 +1460,7 @@ class GpuDnnPool(DnnBase):
...
@@ -1397,6 +1460,7 @@ class GpuDnnPool(DnnBase):
(padX, padY) padding information.
(padX, padY) padding information.
padX is the size of the left and right borders,
padX is the size of the left and right borders,
padY is the size of the top and bottom borders.
padY is the size of the top and bottom borders.
"""
"""
__props__
=
(
"mode"
,)
__props__
=
(
"mode"
,)
...
@@ -1542,8 +1606,9 @@ for(int i = 0; i < %(nd)d; i++) {
...
@@ -1542,8 +1606,9 @@ for(int i = 0; i < %(nd)d; i++) {
for(int i = 0; i <
%(nd)
d; i++) {
for(int i = 0; i <
%(nd)
d; i++) {
str[i] = *((npy_intp*)PyArray_GETPTR1(
%(str)
s, i));
str[i] = *((npy_intp*)PyArray_GETPTR1(
%(str)
s, i));
}
}
err = cudnnSetPoolingNdDescriptor(
err = cudnnSetPoolingNdDescriptor_v4(
pool
%(name)
s,
%(mode_flag)
s,
%(nd)
d,
pool
%(name)
s,
%(mode_flag)
s,
CUDNN_PROPAGATE_NAN,
%(nd)
d,
win, pad, str);
win, pad, str);
if (err != CUDNN_STATUS_SUCCESS) {
if (err != CUDNN_STATUS_SUCCESS) {
...
@@ -1820,8 +1885,9 @@ for(int i = 0; i < %(nd)d; i++) {
...
@@ -1820,8 +1885,9 @@ for(int i = 0; i < %(nd)d; i++) {
for(int i = 0; i <
%(nd)
d; i++) {
for(int i = 0; i <
%(nd)
d; i++) {
str[i] = *((npy_intp*)PyArray_GETPTR1(
%(str)
s, i));
str[i] = *((npy_intp*)PyArray_GETPTR1(
%(str)
s, i));
}
}
err
%(name)
s = cudnnSetPoolingNdDescriptor(
err
%(name)
s = cudnnSetPoolingNdDescriptor_v4(
pool
%(name)
s,
%(mode_flag)
s,
%(nd)
d,
pool
%(name)
s,
%(mode_flag)
s,
CUDNN_PROPAGATE_NAN,
%(nd)
d,
win, pad, str);
win, pad, str);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
...
@@ -1914,14 +1980,12 @@ class GpuDnnSoftmaxBase(DnnBase):
...
@@ -1914,14 +1980,12 @@ class GpuDnnSoftmaxBase(DnnBase):
----------
----------
tensor_format
tensor_format
Always set this to 'bc01'.
Always set this to 'bc01'.
algo
algo : {'fast', 'accurate', 'log'}
'fast', 'accurate' or 'log' indicating whether, respectively, computations
Indicating whether, respectively, computations should be optimized for
should be optimized for speed, for accuracy, or if CuDNN should rather
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
compute the log-softmax instead.
mode : {'instance', 'channel'}
mode
Indicating whether the softmax should be computed per image across 'c01'
'instance' or 'channel' indicating whether the softmax should
or per spatial location '01' per image across 'c'.
be computed per image across 'c01' or per spatial location '01' per
image across 'c'.
"""
"""
...
@@ -2077,13 +2141,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
...
@@ -2077,13 +2141,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
----------
----------
tensor_format
tensor_format
Always set to 'bc01'.
Always set to 'bc01'.
algo
algo
: {'fast', 'accurate'}
'fast' or 'accurate' i
ndicating whether computations should be
I
ndicating whether computations should be
optimized for speed or accuracy respectively.
optimized for speed or accuracy respectively.
mode
mode : {'instance', 'channel'}
'instance' or 'channel' indicating whether the softmax should
Indicating whether the softmax should be computed per image across 'c01'
be computed per image across 'c01' or per spatial location '01' per
or per spatial location '01' per image across 'c'.
image across 'c'.
"""
"""
...
@@ -2145,13 +2208,12 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
...
@@ -2145,13 +2208,12 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
----------
----------
tensor_format
tensor_format
Always set to 'bc01'.
Always set to 'bc01'.
algo
algo
: {'fast', 'accurate'}
'fast' or 'accurate' i
ndicating whether computations should be
I
ndicating whether computations should be
optimized for speed or accuracy respectively.
optimized for speed or accuracy respectively.
mode
mode : {'instance', 'channel'}
'instance' or 'channel' indicating whether the softmax should
Indicating whether the softmax should be computed per image across 'c01'
be computed per image across 'c01' or per spatial location '01' per
or per spatial location '01' per image across 'c'.
image across 'c'.
"""
"""
...
...
theano/sandbox/cuda/dnn_base.c
浏览文件 @
ef0fc56e
...
@@ -54,7 +54,10 @@ c_set_filterNd(CudaNdarray *var, cudnnFilterDescriptor_t desc) {
...
@@ -54,7 +54,10 @@ c_set_filterNd(CudaNdarray *var, cudnnFilterDescriptor_t desc) {
return
-
1
;
return
-
1
;
}
}
int
dim
=
CudaNdarray_NDIM
(
var
);
int
dim
=
CudaNdarray_NDIM
(
var
);
cudnnStatus_t
err
=
cudnnSetFilterNdDescriptor
(
desc
,
CUDNN_DATA_FLOAT
,
dim
,
cudnnStatus_t
err
=
cudnnSetFilterNdDescriptor_v4
(
desc
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
dim
,
CudaNdarray_HOST_DIMS
(
var
));
CudaNdarray_HOST_DIMS
(
var
));
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
...
theano/sandbox/cuda/dnn_fwd.c
浏览文件 @
ef0fc56e
...
@@ -179,7 +179,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -179,7 +179,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -224,6 +224,19 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -224,6 +224,19 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC
(
output
),
APPLY_SPECIFIC
(
output
),
chosen_algo
,
chosen_algo
,
&
worksize
);
&
worksize
);
if
(
err
==
CUDNN_STATUS_NOT_SUPPORTED
)
{
// Fallback to none algo if not supported
// TODO: Print a warning
chosen_algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
chosen_algo
,
&
worksize
);
}
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error getting worksize: %s"
,
"GpuDnnConv: error getting worksize: %s"
,
...
...
theano/sandbox/cuda/dnn_gi.c
浏览文件 @
ef0fc56e
...
@@ -178,7 +178,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -178,7 +178,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -237,7 +237,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -237,7 +237,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return
1
;
return
1
;
// Perform the convolution
// Perform the convolution
err
=
cudnnConvolutionBackwardData
_v3
(
err
=
cudnnConvolutionBackwardData
(
_handle
,
_handle
,
(
void
*
)
&
alpha
,
(
void
*
)
&
alpha
,
APPLY_SPECIFIC
(
kerns
),
CudaNdarray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
CudaNdarray_DEV_DATA
(
kerns
),
...
...
theano/sandbox/cuda/dnn_gw.c
浏览文件 @
ef0fc56e
...
@@ -173,7 +173,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -173,7 +173,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -221,7 +221,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -221,7 +221,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return
1
;
return
1
;
// Perform the convolution
// Perform the convolution
err
=
cudnnConvolutionBackwardFilter
_v3
(
err
=
cudnnConvolutionBackwardFilter
(
_handle
,
_handle
,
(
void
*
)
&
alpha
,
(
void
*
)
&
alpha
,
APPLY_SPECIFIC
(
input
),
CudaNdarray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
CudaNdarray_DEV_DATA
(
input
),
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
ef0fc56e
...
@@ -392,8 +392,9 @@ def test_pooling_with_tensor_vars():
...
@@ -392,8 +392,9 @@ def test_pooling_with_tensor_vars():
def
test_old_pool_interface
():
def
test_old_pool_interface
():
if
not
cuda
.
dnn
.
dnn_available
():
if
not
cuda
.
dnn
.
dnn_available
()
or
cuda
.
dnn
.
version
()
>
(
5000
,
5000
)
:
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
testfile_dir
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
testfile_dir
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
fname
=
'old_pool_interface.pkl'
fname
=
'old_pool_interface.pkl'
with
open
(
os
.
path
.
join
(
testfile_dir
,
fname
),
'rb'
)
as
fp
:
with
open
(
os
.
path
.
join
(
testfile_dir
,
fname
),
'rb'
)
as
fp
:
...
...
theano/sandbox/gpuarray/conv_desc.c
浏览文件 @
ef0fc56e
...
@@ -35,7 +35,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
...
@@ -35,7 +35,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
return
-
1
;
return
-
1
;
}
}
err
=
cudnnSetConvolutionNdDescriptor
_v3
(
*
desc
,
NB_DIMS
,
pad
,
strides
,
err
=
cudnnSetConvolutionNdDescriptor
(
*
desc
,
NB_DIMS
,
pad
,
strides
,
upscale
,
CONV_MODE
,
PRECISION
);
upscale
,
CONV_MODE
,
PRECISION
);
return
0
;
return
0
;
}
}
theano/sandbox/gpuarray/dnn.py
浏览文件 @
ef0fc56e
...
@@ -33,6 +33,8 @@ from .nnet import GpuSoftmax
...
@@ -33,6 +33,8 @@ from .nnet import GpuSoftmax
from
.opt
import
gpu_seqopt
,
register_opt
,
conv_groupopt
,
op_lifter
from
.opt
import
gpu_seqopt
,
register_opt
,
conv_groupopt
,
op_lifter
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def
raise_no_cudnn
(
msg
=
"CuDNN is required for convolution and pooling"
):
def
raise_no_cudnn
(
msg
=
"CuDNN is required for convolution and pooling"
):
raise
RuntimeError
(
msg
)
raise
RuntimeError
(
msg
)
...
@@ -232,6 +234,7 @@ def version(raises=True):
...
@@ -232,6 +234,7 @@ def version(raises=True):
:raises: If True, raise an exception if CuDNN is not present or badly installed.
:raises: If True, raise an exception if CuDNN is not present or badly installed.
Otherwise, return -1.
Otherwise, return -1.
"""
"""
if
not
dnn_present
():
if
not
dnn_present
():
if
raises
:
if
raises
:
...
@@ -397,9 +400,9 @@ class GpuDnnConv(DnnBase):
...
@@ -397,9 +400,9 @@ class GpuDnnConv(DnnBase):
----------
----------
image
image
kernel
kernel
descr
descr
:
The convolution descriptor.
The convolution descriptor.
algo : {'small', 'none', 'large', 'fft', 'fft_tiling', 'guess_once',
algo : {'small', 'none', 'large', 'fft', 'fft_tiling', '
winograd', '
guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
...
@@ -435,8 +438,12 @@ class GpuDnnConv(DnnBase):
...
@@ -435,8 +438,12 @@ class GpuDnnConv(DnnBase):
raise
RuntimeError
(
"CuDNN tiled-FFT convolution requires "
raise
RuntimeError
(
"CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent"
)
"CuDNN v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"CuDNN winograd convolution requires "
"CuDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
'guess_once'
,
'guess_on_shape_change'
,
'
winograd'
,
'
guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
'time_once'
,
'time_on_shape_change'
]
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
...
@@ -468,6 +475,9 @@ class GpuDnnConv(DnnBase):
...
@@ -468,6 +475,9 @@ class GpuDnnConv(DnnBase):
elif
self
.
algo
==
'fft_tiling'
:
elif
self
.
algo
==
'fft_tiling'
:
# need v4
# need v4
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif
self
.
algo
==
'winograd'
:
# need v5
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'
defs
.
append
((
'CONV_ALGO'
,
alg
))
defs
.
append
((
'CONV_ALGO'
,
alg
))
if
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
,
if
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
,
...
@@ -565,8 +575,11 @@ class GpuDnnConvGradW(DnnBase):
...
@@ -565,8 +575,11 @@ class GpuDnnConvGradW(DnnBase):
----------
----------
image
image
kernel
kernel
descr
descr
:
The convolution descriptor.
The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'small', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
"""
"""
...
@@ -582,9 +595,7 @@ class GpuDnnConvGradW(DnnBase):
...
@@ -582,9 +595,7 @@ class GpuDnnConvGradW(DnnBase):
algo
=
config
.
dnn
.
conv
.
algo_bwd_filter
algo
=
config
.
dnn
.
conv
.
algo_bwd_filter
self
.
algo
=
algo
self
.
algo
=
algo
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'small'
,
assert
self
.
algo
in
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
...
@@ -688,6 +699,9 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -688,6 +699,9 @@ class GpuDnnConvGradI(DnnBase):
kernel
kernel
descr
descr
The convolution descriptor.
The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
"""
"""
...
@@ -708,9 +722,12 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -708,9 +722,12 @@ class GpuDnnConvGradI(DnnBase):
if
version
()
<
4000
and
self
.
algo
==
'fft_tiling'
:
if
version
()
<
4000
and
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"CuDNN's tiled-FFT convolution requires CuDNN "
raise
RuntimeError
(
"CuDNN's tiled-FFT convolution requires CuDNN "
"v4 or more recent"
)
"v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"CuDNN's winograd convolution requires CuDNN "
"v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
'guess_once'
,
'guess_on_shape_change'
,
'
winograd'
,
'
guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
'time_once'
,
'time_on_shape_change'
]
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
...
@@ -749,13 +766,16 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -749,13 +766,16 @@ class GpuDnnConvGradI(DnnBase):
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if
self
.
algo
==
'none'
:
if
self
.
algo
==
'none'
:
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if
self
.
algo
==
'deterministic'
:
el
if
self
.
algo
==
'deterministic'
:
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
if
self
.
algo
==
'fft'
:
el
if
self
.
algo
==
'fft'
:
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
if
self
.
algo
==
'fft_tiling'
:
el
if
self
.
algo
==
'fft_tiling'
:
# big workspace but less than fft
# big workspace but less than fft
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
elif
self
.
algo
==
'winograd'
:
# need v5
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD'
if
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
,
if
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]:
'time_once'
,
'time_on_shape_change'
]:
...
@@ -1047,9 +1067,13 @@ class GpuDnnPoolDesc(Op):
...
@@ -1047,9 +1067,13 @@ class GpuDnnPoolDesc(Op):
static const int win[
%(nd)
d] = {
%(win)
s};
static const int win[
%(nd)
d] = {
%(win)
s};
static const int pad[
%(nd)
d] = {
%(pad)
s};
static const int pad[
%(nd)
d] = {
%(pad)
s};
static const int str[
%(nd)
d] = {
%(str)
s};
static const int str[
%(nd)
d] = {
%(str)
s};
err = cudnnSetPoolingNdDescriptor(
%(desc)
s,
%(mode_flag)
s,
%(nd)
d,
#if CUDNN_VERSION >= 5000
win, pad, str);
err = cudnnSetPoolingNdDescriptor(
%(desc)
s,
%(mode_flag)
s, CUDNN_PROPAGATE_NAN,
%(nd)
d, win, pad, str);
#else
err = cudnnSetPoolingNdDescriptor(
%(desc)
s,
%(mode_flag)
s,
%(nd)
d, win, pad, str);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
cudnnGetErrorString(err));
cudnnGetErrorString(err));
...
@@ -1062,7 +1086,7 @@ class GpuDnnPoolDesc(Op):
...
@@ -1062,7 +1086,7 @@ class GpuDnnPoolDesc(Op):
str
=
', '
.
join
(
map
(
str
,
self
.
stride
)))
str
=
', '
.
join
(
map
(
str
,
self
.
stride
)))
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
3
,
version
())
return
(
4
,
version
())
class
GpuDnnPool
(
DnnBase
):
class
GpuDnnPool
(
DnnBase
):
...
@@ -1070,18 +1094,17 @@ class GpuDnnPool(DnnBase):
...
@@ -1070,18 +1094,17 @@ class GpuDnnPool(DnnBase):
"""
"""
Parameters
Parameters
----------
----------
img
img
: tensor
The image 4d or 5d tensor.
The image 4d or 5d tensor.
Parameters
ws : tensor
----------
ws : tensor variable
Window size.
Window size.
stride : tensor
variable
stride : tensor
(dx, dy) or (dx, dy, dz).
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
pad : tensor
(padX, padY) or (padX, padY, padZ)
(padX, padY) or (padX, padY, padZ)
"""
"""
__props__
=
(
'mode'
,)
__props__
=
(
'mode'
,)
...
@@ -1255,14 +1278,12 @@ class GpuDnnSoftmaxBase(DnnBase):
...
@@ -1255,14 +1278,12 @@ class GpuDnnSoftmaxBase(DnnBase):
Parameters
Parameters
----------
----------
algo
algo : {'fast', 'accurate', 'log'}
'fast', 'accurate' or 'log' indicating whether, respectively,
Indicating whether, respectively, computations should be optimized for
computations should be optimized for speed, for accuracy, or if CuDNN
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
mode
Indicating whether the softmax should be computed per image across 'c01'
'instance' or 'channel' indicating whether the softmax should be
or per spatial location '01' per image across 'c'.
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
"""
"""
...
@@ -1306,14 +1327,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
...
@@ -1306,14 +1327,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
"""
"""
Op for the cuDNN Softmax.
Op for the cuDNN Softmax.
algo
algo : {'fast', 'accurate', 'log'}
'fast', 'accurate' or 'log' indicating whether, respectively,
Indicating whether, respectively, computations should be optimized for
computations should be optimized for speed, for accuracy, or if CuDNN
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
mode
Indicating whether the softmax should be computed per image across 'c01'
'instance' or 'channel' indicating whether the softmax should be
or per spatial location '01' per image across 'c'.
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
"""
"""
direction
=
"forward"
direction
=
"forward"
...
...
theano/sandbox/gpuarray/dnn_base.c
浏览文件 @
ef0fc56e
...
@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
...
@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
static
int
static
int
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
)
{
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
)
{
cudnnDataType_t
dt
;
cudnnDataType_t
dt
;
cudnnStatus_t
err
;
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
var
->
ga
))
{
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
var
->
ga
))
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"Only contiguous filters (kernels) are supported."
);
"Only contiguous filters (kernels) are supported."
);
...
@@ -86,7 +88,12 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
...
@@ -86,7 +88,12 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
dims
[
i
]
=
PyGpuArray_DIM
(
var
,
i
);
dims
[
i
]
=
PyGpuArray_DIM
(
var
,
i
);
}
}
cudnnStatus_t
err
=
cudnnSetFilterNdDescriptor
(
desc
,
dt
,
nd
,
dims
);
#if CUDNN_VERSION >= 5000
err
=
cudnnSetFilterNdDescriptor
(
desc
,
dt
,
CUDNN_TENSOR_NCHW
,
nd
,
dims
);
#else
err
=
cudnnSetFilterNdDescriptor
(
desc
,
dt
,
nd
,
dims
);
#endif
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set filter descriptor: %s."
,
"Could not set filter descriptor: %s."
,
...
...
theano/sandbox/gpuarray/dnn_fwd.c
浏览文件 @
ef0fc56e
...
@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
=
0
,
total
=
0
;
size_t
free
;
cudaError_t
err2
=
cudaMemGetInfo
(
&
free
,
&
total
);
int
err2
=
c
->
ops
->
property
(
c
->
ctx
,
NULL
,
NULL
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
cudaSuccess
)
{
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU: %s
\n
"
,
"memory information on the GPU"
);
cudaGetErrorString
(
err2
));
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
...
@@ -154,7 +154,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -154,7 +154,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -193,6 +193,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -193,6 +193,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC
(
output
),
APPLY_SPECIFIC
(
output
),
algo
,
algo
,
&
worksize
);
&
worksize
);
if
(
err
==
CUDNN_STATUS_NOT_SUPPORTED
)
{
// Fallback to none algo if not supported
// TODO: Print a warning
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
APPLY_SPECIFIC
(
_handle
),
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
algo
,
&
worksize
);
}
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"error getting worksize: %s"
,
"error getting worksize: %s"
,
...
...
theano/sandbox/gpuarray/dnn_gi.c
浏览文件 @
ef0fc56e
...
@@ -91,12 +91,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -91,12 +91,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
=
0
,
total
=
0
;
size_t
free
;
cudaError_t
err2
=
cudaMemGetInfo
(
&
free
,
&
total
);
int
err2
=
c
->
ops
->
property
(
c
->
ctx
,
NULL
,
NULL
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
cudaSuccess
){
cudaGetLastError
();
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the
memory
"
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"
information on the GPU: %s
\n
"
,
cudaGetErrorString
(
err2
)
);
"
memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
...
@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -203,7 +203,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -203,7 +203,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardData
_v3
(
err
=
cudnnConvolutionBackwardData
(
APPLY_SPECIFIC
(
_handle
),
APPLY_SPECIFIC
(
_handle
),
alpha_p
,
alpha_p
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
...
...
theano/sandbox/gpuarray/dnn_gw.c
浏览文件 @
ef0fc56e
...
@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
=
0
,
total
=
0
;
size_t
free
;
cudaError_t
err2
=
cudaMemGetInfo
(
&
free
,
&
total
);
int
err2
=
c
->
ops
->
property
(
c
->
ctx
,
NULL
,
NULL
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
cudaSuccess
){
cudaGetLastError
();
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the
memory
"
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"
information on the GPU: %s
\n
"
,
cudaGetErrorString
(
err2
)
);
"
memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
...
@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -190,7 +190,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -190,7 +190,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardFilter
_v3
(
err
=
cudnnConvolutionBackwardFilter
(
APPLY_SPECIFIC
(
_handle
),
APPLY_SPECIFIC
(
_handle
),
alpha_p
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
...
...
theano/sandbox/gpuarray/dnn_pool.c
浏览文件 @
ef0fc56e
...
@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
...
@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
}
}
#if CUDNN_VERSION >= 5000
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
CUDNN_PROPAGATE_NAN
,
ndims
,
w
,
p
,
s
);
#else
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
#endif
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
...
...
theano/sandbox/gpuarray/dnn_pool_grad.c
浏览文件 @
ef0fc56e
...
@@ -109,7 +109,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
...
@@ -109,7 +109,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
}
}
#if CUDNN_VERSION >= 5000
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
CUDNN_PROPAGATE_NAN
,
ndims
,
w
,
p
,
s
);
#else
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
#endif
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
...
...
theano/sandbox/gpuarray/tests/test_dnn.py
浏览文件 @
ef0fc56e
import
logging
import
logging
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
from
nose_parameterized
import
parameterized
import
numpy
import
numpy
from
itertools
import
product
from
itertools
import
product
,
chain
import
theano
import
theano
from
six
import
StringIO
from
six
import
StringIO
...
@@ -18,6 +19,8 @@ from ..basic_ops import GpuAllocEmpty
...
@@ -18,6 +19,8 @@ from ..basic_ops import GpuAllocEmpty
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
from
.
import
test_nnet
from
.
import
test_nnet
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
def
test_dnn_conv_desc_merge
():
def
test_dnn_conv_desc_merge
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
...
@@ -393,6 +396,9 @@ def test_dnn_tag():
...
@@ -393,6 +396,9 @@ def test_dnn_tag():
class
TestDnnInferShapes
(
utt
.
InferShapeTester
):
class
TestDnnInferShapes
(
utt
.
InferShapeTester
):
border_modes
=
[
'valid'
,
'full'
,
'half'
]
conv_modes
=
[
'conv'
,
'cross'
]
def
setUp
(
self
):
def
setUp
(
self
):
super
(
TestDnnInferShapes
,
self
)
.
setUp
()
super
(
TestDnnInferShapes
,
self
)
.
setUp
()
self
.
mode
=
mode_with_gpu
self
.
mode
=
mode_with_gpu
...
@@ -427,37 +433,25 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -427,37 +433,25 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnSoftmaxGrad
dnn
.
GpuDnnSoftmaxGrad
)
)
def
test_conv
(
self
):
def
_test_conv
(
self
,
img
,
kerns
,
out
,
img_val
,
kern_vals
,
border_mode
,
conv_mode
,
subsamples
,
algo
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
img
=
T
.
ftensor4
(
'img'
)
kerns
=
T
.
ftensor4
(
'kerns'
)
out
=
T
.
ftensor4
(
'out'
)
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
7
,
2
,
6
,
4
),
dtype
=
'float32'
)
kern_vals
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
8
,
2
,
4
,
3
),
dtype
=
'float32'
)
for
params
in
product
(
img_val
=
numpy
.
asarray
(
img_val
,
dtype
=
'float32'
)
[
'valid'
,
'full'
,
'half'
],
kern_vals
=
numpy
.
asarray
(
kern_vals
,
dtype
=
'float32'
)
[(
1
,
1
),
(
2
,
2
)],
[
'conv'
,
'cross'
]
for
subsample
in
subsamples
:
):
out_vals
=
numpy
.
zeros
(
out_vals
=
numpy
.
zeros
(
dnn
.
GpuDnnConv
.
get_out_shape
(
img_val
.
shape
,
kern_vals
.
shape
,
dnn
.
GpuDnnConv
.
get_out_shape
(
img_val
.
shape
,
kern_vals
.
shape
,
border_mode
=
params
[
0
]
,
border_mode
=
border_mode
,
subsample
=
params
[
1
]
),
subsample
=
subsample
),
dtype
=
'float32'
)
dtype
=
'float32'
)
desc
=
dnn
.
GpuDnnConvDesc
(
desc
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
params
[
0
]
,
border_mode
=
border_mode
,
subsample
=
params
[
1
]
,
subsample
=
subsample
,
conv_mode
=
params
[
2
]
conv_mode
=
conv_mode
)(
kerns
.
shape
)
)(
kerns
.
shape
)
conv
=
dnn
.
GpuDnnConv
()(
img
,
kerns
,
out
,
desc
)
conv
=
dnn
.
GpuDnnConv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
self
.
_compile_and_check
(
self
.
_compile_and_check
(
[
img
,
kerns
,
out
],
[
img
,
kerns
,
out
],
[
conv
],
[
conv
],
...
@@ -465,29 +459,56 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -465,29 +459,56 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnConv
dnn
.
GpuDnnConv
)
)
def
test_conv_gradw
(
self
):
@parameterized.expand
(
chain
(
product
([
SUPPORTED_DNN_CONV_ALGO_FWD
[
0
]],
border_modes
,
conv_modes
),
product
(
SUPPORTED_DNN_CONV_ALGO_FWD
[
1
:],
[
border_modes
[
0
]],
[
conv_modes
[
0
]])),
testcase_func_name
=
utt
.
custom_name_func
)
def
test_conv
(
self
,
algo
,
border_mode
,
conv_mode
):
if
algo
==
'winograd'
and
dnn
.
version
()
<
5000
:
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
self
.
_test_conv
(
T
.
ftensor4
(
'img'
),
T
.
ftensor4
(
'kerns'
),
T
.
ftensor4
(
'out'
),
numpy
.
random
.
rand
(
7
,
2
,
8
,
4
),
numpy
.
random
.
rand
(
8
,
2
,
4
,
3
),
border_mode
,
conv_mode
,
[(
1
,
1
),
(
2
,
2
)],
algo
)
@parameterized.expand
(
product
(
border_modes
,
conv_modes
),
utt
.
custom_name_func
)
def
test_conv3d_none
(
self
,
border_mode
,
conv_mode
):
ftensor5
=
T
.
TensorType
(
dtype
=
"float32"
,
broadcastable
=
(
False
,)
*
5
)
self
.
_test_conv
(
ftensor5
(
'img'
),
ftensor5
(
'kerns'
),
ftensor5
(
'out'
),
numpy
.
random
.
rand
(
10
,
2
,
6
,
4
,
11
),
numpy
.
random
.
rand
(
8
,
2
,
4
,
3
,
1
),
border_mode
,
conv_mode
,
[(
1
,
1
,
1
),
(
2
,
2
,
2
)],
'none'
)
def
_test_conv_gradw
(
self
,
img
,
kerns
,
out
,
img_val
,
kern_vals
,
border_mode
,
conv_mode
,
subsample
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
img
=
T
.
ftensor4
(
'img'
)
kerns
=
T
.
ftensor4
(
'kerns'
)
out
=
T
.
ftensor4
(
'out'
)
img_val
=
numpy
.
asarray
(
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
5
,
6
,
8
)
,
img_val
,
dtype
=
'float32'
dtype
=
'float32'
)
)
kern_vals
=
numpy
.
asarray
(
kern_vals
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
1
,
5
,
6
)
,
kern_vals
,
dtype
=
'float32'
dtype
=
'float32'
)
)
for
params
in
product
(
[
'valid'
,
'full'
,
'half'
],
[(
1
,
1
)],
# strides besides (1, 1)
[
'conv'
,
'cross'
]
):
temp_img
=
img
.
dimshuffle
(
1
,
0
,
2
,
3
)
temp_img
=
img
.
dimshuffle
(
1
,
0
,
2
,
3
)
temp_kerns
=
kerns
temp_kerns
=
kerns
if
params
[
2
]
==
'conv'
:
if
conv_mode
==
'conv'
:
temp_kerns
=
temp_kerns
[:,
:,
::
-
1
,
::
-
1
]
temp_kerns
=
temp_kerns
[:,
:,
::
-
1
,
::
-
1
]
temp_kerns
=
temp_kerns
.
dimshuffle
(
1
,
0
,
2
,
3
)
temp_kerns
=
temp_kerns
.
dimshuffle
(
1
,
0
,
2
,
3
)
shape
=
(
shape
=
(
...
@@ -497,9 +518,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -497,9 +518,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
)
)
out_vals
=
numpy
.
zeros
(
shape
,
dtype
=
'float32'
)
out_vals
=
numpy
.
zeros
(
shape
,
dtype
=
'float32'
)
desc
=
dnn
.
GpuDnnConvDesc
(
desc
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
params
[
0
]
,
border_mode
=
border_mode
,
subsample
=
params
[
1
]
,
subsample
=
subsample
,
conv_mode
=
params
[
2
]
conv_mode
=
conv_mode
)(
out
.
shape
)
)(
out
.
shape
)
conv_grad_w
=
dnn
.
GpuDnnConvGradW
()(
conv_grad_w
=
dnn
.
GpuDnnConvGradW
()(
temp_img
,
temp_img
,
...
@@ -514,6 +535,17 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -514,6 +535,17 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnConvGradW
dnn
.
GpuDnnConvGradW
)
)
@parameterized.expand
(
product
(
border_modes
,
conv_modes
),
utt
.
custom_name_func
)
def
test_conv_gradw
(
self
,
border_mode
,
conv_mode
):
self
.
_test_conv_gradw
(
T
.
ftensor4
(
'img'
),
T
.
ftensor4
(
'kerns'
),
T
.
ftensor4
(
'out'
),
numpy
.
random
.
rand
(
2
,
5
,
6
,
8
),
numpy
.
random
.
rand
(
2
,
1
,
5
,
6
),
border_mode
,
conv_mode
,
(
1
,
1
))
def
test_conv_gradi
(
self
):
def
test_conv_gradi
(
self
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
...
theano/tests/unittest_tools.py
浏览文件 @
ef0fc56e
...
@@ -4,6 +4,7 @@ from functools import wraps
...
@@ -4,6 +4,7 @@ from functools import wraps
import
logging
import
logging
import
sys
import
sys
import
unittest
import
unittest
from
nose_parameterized
import
parameterized
from
six
import
integer_types
from
six
import
integer_types
from
six.moves
import
StringIO
from
six.moves
import
StringIO
...
@@ -31,6 +32,13 @@ except ImportError:
...
@@ -31,6 +32,13 @@ except ImportError:
_logger
=
logging
.
getLogger
(
"theano.tests.unittest_tools"
)
_logger
=
logging
.
getLogger
(
"theano.tests.unittest_tools"
)
def
custom_name_func
(
testcase_func
,
param_num
,
param
):
return
"
%
s_
%
s"
%
(
testcase_func
.
__name__
,
parameterized
.
to_safe_name
(
"_"
.
join
(
str
(
x
)
for
x
in
param
.
args
)),
)
def
fetch_seed
(
pseed
=
None
):
def
fetch_seed
(
pseed
=
None
):
"""
"""
Returns the seed to use for running the unit tests.
Returns the seed to use for running the unit tests.
...
@@ -96,6 +104,7 @@ verify_grad.E_grad = T.verify_grad.E_grad
...
@@ -96,6 +104,7 @@ verify_grad.E_grad = T.verify_grad.E_grad
class
TestOptimizationMixin
(
object
):
class
TestOptimizationMixin
(
object
):
def
assertFunctionContains
(
self
,
f
,
op
,
min
=
1
,
max
=
sys
.
maxsize
):
def
assertFunctionContains
(
self
,
f
,
op
,
min
=
1
,
max
=
sys
.
maxsize
):
toposort
=
f
.
maker
.
fgraph
.
toposort
()
toposort
=
f
.
maker
.
fgraph
.
toposort
()
matches
=
[
node
for
node
in
toposort
if
node
.
op
==
op
]
matches
=
[
node
for
node
in
toposort
if
node
.
op
==
op
]
...
@@ -172,6 +181,7 @@ class T_OpContractMixin(object):
...
@@ -172,6 +181,7 @@ class T_OpContractMixin(object):
class
InferShapeTester
(
unittest
.
TestCase
):
class
InferShapeTester
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
seed_rng
()
seed_rng
()
# Take into account any mode that may be defined in a child class
# Take into account any mode that may be defined in a child class
...
@@ -311,6 +321,7 @@ def str_diagnostic(expected, value, rtol, atol):
...
@@ -311,6 +321,7 @@ def str_diagnostic(expected, value, rtol, atol):
class
WrongValue
(
Exception
):
class
WrongValue
(
Exception
):
def
__init__
(
self
,
expected_val
,
val
,
rtol
,
atol
):
def
__init__
(
self
,
expected_val
,
val
,
rtol
,
atol
):
Exception
.
__init__
(
self
)
# to be compatible with python2.4
Exception
.
__init__
(
self
)
# to be compatible with python2.4
self
.
val1
=
expected_val
self
.
val1
=
expected_val
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论