Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
ef0fc56e
提交
ef0fc56e
authored
4月 12, 2016
作者:
Frédéric Bastien
提交者:
Frederic Bastien
4月 13, 2016
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Merge pull request #4344 from mgermain/cudnn5
Added cuDNN v5 support & Added optional dependencies to setup.py Conflicts: .travis.yml doc/install.txt
上级
02453383
全部展开
显示空白字符变更
内嵌
并排
正在显示
21 个修改的文件
包含
243 行增加
和
116 行删除
+243
-116
.travis.yml
.travis.yml
+1
-0
install.txt
doc/install.txt
+1
-1
requirement-rtd.txt
requirement-rtd.txt
+2
-0
setup.py
setup.py
+5
-0
configdefaults.py
theano/configdefaults.py
+17
-9
dnn.py
theano/sandbox/cuda/dnn.py
+0
-0
dnn_base.c
theano/sandbox/cuda/dnn_base.c
+4
-1
dnn_fwd.c
theano/sandbox/cuda/dnn_fwd.c
+14
-1
dnn_gi.c
theano/sandbox/cuda/dnn_gi.c
+2
-2
dnn_gw.c
theano/sandbox/cuda/dnn_gw.c
+2
-2
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+2
-1
conv_desc.c
theano/sandbox/gpuarray/conv_desc.c
+1
-1
dnn.py
theano/sandbox/gpuarray/dnn.py
+55
-36
dnn_base.c
theano/sandbox/gpuarray/dnn_base.c
+8
-1
dnn_fwd.c
theano/sandbox/gpuarray/dnn_fwd.c
+21
-6
dnn_gi.c
theano/sandbox/gpuarray/dnn_gi.c
+8
-8
dnn_gw.c
theano/sandbox/gpuarray/dnn_gw.c
+8
-8
dnn_pool.c
theano/sandbox/gpuarray/dnn_pool.c
+5
-0
dnn_pool_grad.c
theano/sandbox/gpuarray/dnn_pool_grad.c
+5
-0
test_dnn.py
theano/sandbox/gpuarray/tests/test_dnn.py
+71
-39
unittest_tools.py
theano/tests/unittest_tools.py
+11
-0
没有找到文件。
.travis.yml
浏览文件 @
ef0fc56e
...
@@ -37,6 +37,7 @@ install:
...
@@ -37,6 +37,7 @@ install:
-
source activate pyenv
-
source activate pyenv
-
if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi
-
if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi
-
pip install . --no-deps
-
pip install . --no-deps
-
pip install nose-parameterized==0.5.0
# command to run tests
# command to run tests
env
:
env
:
...
...
doc/install.txt
浏览文件 @
ef0fc56e
...
@@ -49,7 +49,7 @@ instructions below for detailed installation steps):
...
@@ -49,7 +49,7 @@ instructions below for detailed installation steps):
The following libraries and software are optional:
The following libraries and software are optional:
`nose <http://
somethingaboutorange.com/mrl/projects/nose/>`_ >= 1.3
.0
`nose <http://
nose.readthedocs.org/en/latest/>`_ >= 1.3.0 and `nose-parameterized <https://pypi.python.org/pypi/nose-parameterized/>`_ >= 0.5
.0
Recommended, to run Theano's test-suite.
Recommended, to run Theano's test-suite.
`Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_
`Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_
...
...
requirement-rtd.txt
浏览文件 @
ef0fc56e
sphinx>=1.3.0
sphinx>=1.3.0
pygments
nose>=1.3.0
nose>=1.3.0
nose-parameterized>=0.5.0
setup.py
浏览文件 @
ef0fc56e
...
@@ -163,6 +163,11 @@ def do_setup():
...
@@ -163,6 +163,11 @@ def do_setup():
packages
=
find_packages
(),
packages
=
find_packages
(),
# 1.7.0 give too much warning related to numpy.diagonal.
# 1.7.0 give too much warning related to numpy.diagonal.
install_requires
=
[
'numpy>=1.7.1'
,
'scipy>=0.11'
,
'six>=1.9.0'
],
install_requires
=
[
'numpy>=1.7.1'
,
'scipy>=0.11'
,
'six>=1.9.0'
],
# pygments is a dependency for Sphinx code highlight
extras_require
=
{
'test'
:
[
'nose>=1.3.0'
,
'nose-parameterized>=0.5.0'
],
'doc'
:
[
'Sphinx>=0.5.1'
,
'pygments'
]
},
package_data
=
{
package_data
=
{
''
:
[
'*.txt'
,
'*.rst'
,
'*.cu'
,
'*.cuh'
,
'*.c'
,
'*.sh'
,
'*.pkl'
,
''
:
[
'*.txt'
,
'*.rst'
,
'*.cu'
,
'*.cuh'
,
'*.c'
,
'*.sh'
,
'*.pkl'
,
'*.h'
,
'*.cpp'
,
'ChangeLog'
],
'*.h'
,
'*.cpp'
,
'ChangeLog'
],
...
...
theano/configdefaults.py
浏览文件 @
ef0fc56e
...
@@ -286,6 +286,20 @@ def safe_no_dnn_algo_bwd(algo):
...
@@ -286,6 +286,20 @@ def safe_no_dnn_algo_bwd(algo):
'`dnn.conv.algo_bwd_filter` and `dnn.conv.algo_bwd_data` instead.'
)
'`dnn.conv.algo_bwd_filter` and `dnn.conv.algo_bwd_data` instead.'
)
return
True
return
True
# Those are the supported algorithm by Theano,
# The tests will reference those lists.
SUPPORTED_DNN_CONV_ALGO_FWD
=
(
'small'
,
'none'
,
'large'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
)
SUPPORTED_DNN_CONV_ALGO_BWD_DATA
=
(
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
)
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
=
(
'none'
,
'deterministic'
,
'fft'
,
'small'
,
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
)
AddConfigVar
(
'dnn.conv.algo_bwd'
,
AddConfigVar
(
'dnn.conv.algo_bwd'
,
"This flag is deprecated; use dnn.conv.algo_bwd_data and "
"This flag is deprecated; use dnn.conv.algo_bwd_data and "
"dnn.conv.algo_bwd_filter."
,
"dnn.conv.algo_bwd_filter."
,
...
@@ -295,26 +309,20 @@ AddConfigVar('dnn.conv.algo_bwd',
...
@@ -295,26 +309,20 @@ AddConfigVar('dnn.conv.algo_bwd',
AddConfigVar
(
'dnn.conv.algo_fwd'
,
AddConfigVar
(
'dnn.conv.algo_fwd'
,
"Default implementation to use for CuDNN forward convolution."
,
"Default implementation to use for CuDNN forward convolution."
,
EnumStr
(
'small'
,
'none'
,
'large'
,
'fft'
,
'fft_tiling'
,
EnumStr
(
*
SUPPORTED_DNN_CONV_ALGO_FWD
),
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_bwd_data'
,
AddConfigVar
(
'dnn.conv.algo_bwd_data'
,
"Default implementation to use for CuDNN backward convolution to "
"Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the inputs."
,
"get the gradients of the convolution with regard to the inputs."
,
EnumStr
(
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
EnumStr
(
*
SUPPORTED_DNN_CONV_ALGO_BWD_DATA
),
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.algo_bwd_filter'
,
AddConfigVar
(
'dnn.conv.algo_bwd_filter'
,
"Default implementation to use for CuDNN backward convolution to "
"Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the "
"get the gradients of the convolution with regard to the "
"filters."
,
"filters."
,
EnumStr
(
'none'
,
'deterministic'
,
'fft'
,
'small'
,
'guess_once'
,
EnumStr
(
*
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
),
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
),
in_c_key
=
False
)
in_c_key
=
False
)
AddConfigVar
(
'dnn.conv.precision'
,
AddConfigVar
(
'dnn.conv.precision'
,
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
ef0fc56e
差异被折叠。
点击展开。
theano/sandbox/cuda/dnn_base.c
浏览文件 @
ef0fc56e
...
@@ -54,7 +54,10 @@ c_set_filterNd(CudaNdarray *var, cudnnFilterDescriptor_t desc) {
...
@@ -54,7 +54,10 @@ c_set_filterNd(CudaNdarray *var, cudnnFilterDescriptor_t desc) {
return
-
1
;
return
-
1
;
}
}
int
dim
=
CudaNdarray_NDIM
(
var
);
int
dim
=
CudaNdarray_NDIM
(
var
);
cudnnStatus_t
err
=
cudnnSetFilterNdDescriptor
(
desc
,
CUDNN_DATA_FLOAT
,
dim
,
cudnnStatus_t
err
=
cudnnSetFilterNdDescriptor_v4
(
desc
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
dim
,
CudaNdarray_HOST_DIMS
(
var
));
CudaNdarray_HOST_DIMS
(
var
));
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
...
theano/sandbox/cuda/dnn_fwd.c
浏览文件 @
ef0fc56e
...
@@ -179,7 +179,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -179,7 +179,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -224,6 +224,19 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -224,6 +224,19 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC
(
output
),
APPLY_SPECIFIC
(
output
),
chosen_algo
,
chosen_algo
,
&
worksize
);
&
worksize
);
if
(
err
==
CUDNN_STATUS_NOT_SUPPORTED
)
{
// Fallback to none algo if not supported
// TODO: Print a warning
chosen_algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
chosen_algo
,
&
worksize
);
}
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error getting worksize: %s"
,
"GpuDnnConv: error getting worksize: %s"
,
...
...
theano/sandbox/cuda/dnn_gi.c
浏览文件 @
ef0fc56e
...
@@ -178,7 +178,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -178,7 +178,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -237,7 +237,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -237,7 +237,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return
1
;
return
1
;
// Perform the convolution
// Perform the convolution
err
=
cudnnConvolutionBackwardData
_v3
(
err
=
cudnnConvolutionBackwardData
(
_handle
,
_handle
,
(
void
*
)
&
alpha
,
(
void
*
)
&
alpha
,
APPLY_SPECIFIC
(
kerns
),
CudaNdarray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
CudaNdarray_DEV_DATA
(
kerns
),
...
...
theano/sandbox/cuda/dnn_gw.c
浏览文件 @
ef0fc56e
...
@@ -173,7 +173,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -173,7 +173,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -221,7 +221,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -221,7 +221,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return
1
;
return
1
;
// Perform the convolution
// Perform the convolution
err
=
cudnnConvolutionBackwardFilter
_v3
(
err
=
cudnnConvolutionBackwardFilter
(
_handle
,
_handle
,
(
void
*
)
&
alpha
,
(
void
*
)
&
alpha
,
APPLY_SPECIFIC
(
input
),
CudaNdarray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
CudaNdarray_DEV_DATA
(
input
),
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
ef0fc56e
...
@@ -392,8 +392,9 @@ def test_pooling_with_tensor_vars():
...
@@ -392,8 +392,9 @@ def test_pooling_with_tensor_vars():
def
test_old_pool_interface
():
def
test_old_pool_interface
():
if
not
cuda
.
dnn
.
dnn_available
():
if
not
cuda
.
dnn
.
dnn_available
()
or
cuda
.
dnn
.
version
()
>
(
5000
,
5000
)
:
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
testfile_dir
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
testfile_dir
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
fname
=
'old_pool_interface.pkl'
fname
=
'old_pool_interface.pkl'
with
open
(
os
.
path
.
join
(
testfile_dir
,
fname
),
'rb'
)
as
fp
:
with
open
(
os
.
path
.
join
(
testfile_dir
,
fname
),
'rb'
)
as
fp
:
...
...
theano/sandbox/gpuarray/conv_desc.c
浏览文件 @
ef0fc56e
...
@@ -35,7 +35,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
...
@@ -35,7 +35,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
return
-
1
;
return
-
1
;
}
}
err
=
cudnnSetConvolutionNdDescriptor
_v3
(
*
desc
,
NB_DIMS
,
pad
,
strides
,
err
=
cudnnSetConvolutionNdDescriptor
(
*
desc
,
NB_DIMS
,
pad
,
strides
,
upscale
,
CONV_MODE
,
PRECISION
);
upscale
,
CONV_MODE
,
PRECISION
);
return
0
;
return
0
;
}
}
theano/sandbox/gpuarray/dnn.py
浏览文件 @
ef0fc56e
...
@@ -33,6 +33,8 @@ from .nnet import GpuSoftmax
...
@@ -33,6 +33,8 @@ from .nnet import GpuSoftmax
from
.opt
import
gpu_seqopt
,
register_opt
,
conv_groupopt
,
op_lifter
from
.opt
import
gpu_seqopt
,
register_opt
,
conv_groupopt
,
op_lifter
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def
raise_no_cudnn
(
msg
=
"CuDNN is required for convolution and pooling"
):
def
raise_no_cudnn
(
msg
=
"CuDNN is required for convolution and pooling"
):
raise
RuntimeError
(
msg
)
raise
RuntimeError
(
msg
)
...
@@ -232,6 +234,7 @@ def version(raises=True):
...
@@ -232,6 +234,7 @@ def version(raises=True):
:raises: If True, raise an exception if CuDNN is not present or badly installed.
:raises: If True, raise an exception if CuDNN is not present or badly installed.
Otherwise, return -1.
Otherwise, return -1.
"""
"""
if
not
dnn_present
():
if
not
dnn_present
():
if
raises
:
if
raises
:
...
@@ -397,9 +400,9 @@ class GpuDnnConv(DnnBase):
...
@@ -397,9 +400,9 @@ class GpuDnnConv(DnnBase):
----------
----------
image
image
kernel
kernel
descr
descr
:
The convolution descriptor.
The convolution descriptor.
algo : {'small', 'none', 'large', 'fft', 'fft_tiling', 'guess_once',
algo : {'small', 'none', 'large', 'fft', 'fft_tiling', '
winograd', '
guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
...
@@ -435,8 +438,12 @@ class GpuDnnConv(DnnBase):
...
@@ -435,8 +438,12 @@ class GpuDnnConv(DnnBase):
raise
RuntimeError
(
"CuDNN tiled-FFT convolution requires "
raise
RuntimeError
(
"CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent"
)
"CuDNN v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"CuDNN winograd convolution requires "
"CuDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
'guess_once'
,
'guess_on_shape_change'
,
'
winograd'
,
'
guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
'time_once'
,
'time_on_shape_change'
]
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
...
@@ -468,6 +475,9 @@ class GpuDnnConv(DnnBase):
...
@@ -468,6 +475,9 @@ class GpuDnnConv(DnnBase):
elif
self
.
algo
==
'fft_tiling'
:
elif
self
.
algo
==
'fft_tiling'
:
# need v4
# need v4
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif
self
.
algo
==
'winograd'
:
# need v5
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'
defs
.
append
((
'CONV_ALGO'
,
alg
))
defs
.
append
((
'CONV_ALGO'
,
alg
))
if
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
,
if
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
,
...
@@ -565,8 +575,11 @@ class GpuDnnConvGradW(DnnBase):
...
@@ -565,8 +575,11 @@ class GpuDnnConvGradW(DnnBase):
----------
----------
image
image
kernel
kernel
descr
descr
:
The convolution descriptor.
The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'small', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
"""
"""
...
@@ -582,9 +595,7 @@ class GpuDnnConvGradW(DnnBase):
...
@@ -582,9 +595,7 @@ class GpuDnnConvGradW(DnnBase):
algo
=
config
.
dnn
.
conv
.
algo_bwd_filter
algo
=
config
.
dnn
.
conv
.
algo_bwd_filter
self
.
algo
=
algo
self
.
algo
=
algo
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'small'
,
assert
self
.
algo
in
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
...
@@ -688,6 +699,9 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -688,6 +699,9 @@ class GpuDnnConvGradI(DnnBase):
kernel
kernel
descr
descr
The convolution descriptor.
The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
"""
"""
...
@@ -708,9 +722,12 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -708,9 +722,12 @@ class GpuDnnConvGradI(DnnBase):
if
version
()
<
4000
and
self
.
algo
==
'fft_tiling'
:
if
version
()
<
4000
and
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"CuDNN's tiled-FFT convolution requires CuDNN "
raise
RuntimeError
(
"CuDNN's tiled-FFT convolution requires CuDNN "
"v4 or more recent"
)
"v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"CuDNN's winograd convolution requires CuDNN "
"v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
'guess_once'
,
'guess_on_shape_change'
,
'
winograd'
,
'
guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]
'time_once'
,
'time_on_shape_change'
]
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
...
@@ -749,13 +766,16 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -749,13 +766,16 @@ class GpuDnnConvGradI(DnnBase):
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if
self
.
algo
==
'none'
:
if
self
.
algo
==
'none'
:
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if
self
.
algo
==
'deterministic'
:
el
if
self
.
algo
==
'deterministic'
:
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
if
self
.
algo
==
'fft'
:
el
if
self
.
algo
==
'fft'
:
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
if
self
.
algo
==
'fft_tiling'
:
el
if
self
.
algo
==
'fft_tiling'
:
# big workspace but less than fft
# big workspace but less than fft
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
elif
self
.
algo
==
'winograd'
:
# need v5
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD'
if
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
,
if
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
,
'time_once'
,
'time_on_shape_change'
]:
'time_once'
,
'time_on_shape_change'
]:
...
@@ -1047,9 +1067,13 @@ class GpuDnnPoolDesc(Op):
...
@@ -1047,9 +1067,13 @@ class GpuDnnPoolDesc(Op):
static const int win[
%(nd)
d] = {
%(win)
s};
static const int win[
%(nd)
d] = {
%(win)
s};
static const int pad[
%(nd)
d] = {
%(pad)
s};
static const int pad[
%(nd)
d] = {
%(pad)
s};
static const int str[
%(nd)
d] = {
%(str)
s};
static const int str[
%(nd)
d] = {
%(str)
s};
err = cudnnSetPoolingNdDescriptor(
%(desc)
s,
%(mode_flag)
s,
%(nd)
d,
#if CUDNN_VERSION >= 5000
win, pad, str);
err = cudnnSetPoolingNdDescriptor(
%(desc)
s,
%(mode_flag)
s, CUDNN_PROPAGATE_NAN,
%(nd)
d, win, pad, str);
#else
err = cudnnSetPoolingNdDescriptor(
%(desc)
s,
%(mode_flag)
s,
%(nd)
d, win, pad, str);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
cudnnGetErrorString(err));
cudnnGetErrorString(err));
...
@@ -1062,7 +1086,7 @@ class GpuDnnPoolDesc(Op):
...
@@ -1062,7 +1086,7 @@ class GpuDnnPoolDesc(Op):
str
=
', '
.
join
(
map
(
str
,
self
.
stride
)))
str
=
', '
.
join
(
map
(
str
,
self
.
stride
)))
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
3
,
version
())
return
(
4
,
version
())
class
GpuDnnPool
(
DnnBase
):
class
GpuDnnPool
(
DnnBase
):
...
@@ -1070,18 +1094,17 @@ class GpuDnnPool(DnnBase):
...
@@ -1070,18 +1094,17 @@ class GpuDnnPool(DnnBase):
"""
"""
Parameters
Parameters
----------
----------
img
img
: tensor
The image 4d or 5d tensor.
The image 4d or 5d tensor.
Parameters
ws : tensor
----------
ws : tensor variable
Window size.
Window size.
stride : tensor
variable
stride : tensor
(dx, dy) or (dx, dy, dz).
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
pad : tensor
(padX, padY) or (padX, padY, padZ)
(padX, padY) or (padX, padY, padZ)
"""
"""
__props__
=
(
'mode'
,)
__props__
=
(
'mode'
,)
...
@@ -1255,14 +1278,12 @@ class GpuDnnSoftmaxBase(DnnBase):
...
@@ -1255,14 +1278,12 @@ class GpuDnnSoftmaxBase(DnnBase):
Parameters
Parameters
----------
----------
algo
algo : {'fast', 'accurate', 'log'}
'fast', 'accurate' or 'log' indicating whether, respectively,
Indicating whether, respectively, computations should be optimized for
computations should be optimized for speed, for accuracy, or if CuDNN
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
mode
Indicating whether the softmax should be computed per image across 'c01'
'instance' or 'channel' indicating whether the softmax should be
or per spatial location '01' per image across 'c'.
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
"""
"""
...
@@ -1306,14 +1327,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
...
@@ -1306,14 +1327,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
"""
"""
Op for the cuDNN Softmax.
Op for the cuDNN Softmax.
algo
algo : {'fast', 'accurate', 'log'}
'fast', 'accurate' or 'log' indicating whether, respectively,
Indicating whether, respectively, computations should be optimized for
computations should be optimized for speed, for accuracy, or if CuDNN
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
mode
Indicating whether the softmax should be computed per image across 'c01'
'instance' or 'channel' indicating whether the softmax should be
or per spatial location '01' per image across 'c'.
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
"""
"""
direction
=
"forward"
direction
=
"forward"
...
...
theano/sandbox/gpuarray/dnn_base.c
浏览文件 @
ef0fc56e
...
@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
...
@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
static
int
static
int
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
)
{
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
)
{
cudnnDataType_t
dt
;
cudnnDataType_t
dt
;
cudnnStatus_t
err
;
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
var
->
ga
))
{
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
var
->
ga
))
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"Only contiguous filters (kernels) are supported."
);
"Only contiguous filters (kernels) are supported."
);
...
@@ -86,7 +88,12 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
...
@@ -86,7 +88,12 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
dims
[
i
]
=
PyGpuArray_DIM
(
var
,
i
);
dims
[
i
]
=
PyGpuArray_DIM
(
var
,
i
);
}
}
cudnnStatus_t
err
=
cudnnSetFilterNdDescriptor
(
desc
,
dt
,
nd
,
dims
);
#if CUDNN_VERSION >= 5000
err
=
cudnnSetFilterNdDescriptor
(
desc
,
dt
,
CUDNN_TENSOR_NCHW
,
nd
,
dims
);
#else
err
=
cudnnSetFilterNdDescriptor
(
desc
,
dt
,
nd
,
dims
);
#endif
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set filter descriptor: %s."
,
"Could not set filter descriptor: %s."
,
...
...
theano/sandbox/gpuarray/dnn_fwd.c
浏览文件 @
ef0fc56e
...
@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
=
0
,
total
=
0
;
size_t
free
;
cudaError_t
err2
=
cudaMemGetInfo
(
&
free
,
&
total
);
int
err2
=
c
->
ops
->
property
(
c
->
ctx
,
NULL
,
NULL
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
cudaSuccess
)
{
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU: %s
\n
"
,
"memory information on the GPU"
);
cudaGetErrorString
(
err2
));
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
...
@@ -154,7 +154,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -154,7 +154,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -193,6 +193,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -193,6 +193,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC
(
output
),
APPLY_SPECIFIC
(
output
),
algo
,
algo
,
&
worksize
);
&
worksize
);
if
(
err
==
CUDNN_STATUS_NOT_SUPPORTED
)
{
// Fallback to none algo if not supported
// TODO: Print a warning
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
APPLY_SPECIFIC
(
_handle
),
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
algo
,
&
worksize
);
}
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"error getting worksize: %s"
,
"error getting worksize: %s"
,
...
...
theano/sandbox/gpuarray/dnn_gi.c
浏览文件 @
ef0fc56e
...
@@ -91,12 +91,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -91,12 +91,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
=
0
,
total
=
0
;
size_t
free
;
cudaError_t
err2
=
cudaMemGetInfo
(
&
free
,
&
total
);
int
err2
=
c
->
ops
->
property
(
c
->
ctx
,
NULL
,
NULL
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
cudaSuccess
){
cudaGetLastError
();
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the
memory
"
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"
information on the GPU: %s
\n
"
,
cudaGetErrorString
(
err2
)
);
"
memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
...
@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -203,7 +203,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -203,7 +203,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardData
_v3
(
err
=
cudnnConvolutionBackwardData
(
APPLY_SPECIFIC
(
_handle
),
APPLY_SPECIFIC
(
_handle
),
alpha_p
,
alpha_p
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
...
...
theano/sandbox/gpuarray/dnn_gw.c
浏览文件 @
ef0fc56e
...
@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo
=
choice
.
algo
;
algo
=
choice
.
algo
;
#else
#else
size_t
free
=
0
,
total
=
0
;
size_t
free
;
cudaError_t
err2
=
cudaMemGetInfo
(
&
free
,
&
total
);
int
err2
=
c
->
ops
->
property
(
c
->
ctx
,
NULL
,
NULL
,
GA_CTX_PROP_FREE_GMEM
,
&
free
);
if
(
err2
!=
cudaSuccess
){
cudaGetLastError
();
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the
memory
"
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"
information on the GPU: %s
\n
"
,
cudaGetErrorString
(
err2
)
);
"
memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
...
@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
int
upscale
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
_v3
(
desc
,
2
,
&
nd
,
pad
,
stride
,
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -190,7 +190,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -190,7 +190,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardFilter
_v3
(
err
=
cudnnConvolutionBackwardFilter
(
APPLY_SPECIFIC
(
_handle
),
APPLY_SPECIFIC
(
_handle
),
alpha_p
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
...
...
theano/sandbox/gpuarray/dnn_pool.c
浏览文件 @
ef0fc56e
...
@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
...
@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
}
}
#if CUDNN_VERSION >= 5000
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
CUDNN_PROPAGATE_NAN
,
ndims
,
w
,
p
,
s
);
#else
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
#endif
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
...
...
theano/sandbox/gpuarray/dnn_pool_grad.c
浏览文件 @
ef0fc56e
...
@@ -109,7 +109,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
...
@@ -109,7 +109,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
}
}
#if CUDNN_VERSION >= 5000
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
CUDNN_PROPAGATE_NAN
,
ndims
,
w
,
p
,
s
);
#else
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
#endif
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
...
...
theano/sandbox/gpuarray/tests/test_dnn.py
浏览文件 @
ef0fc56e
import
logging
import
logging
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
from
nose_parameterized
import
parameterized
import
numpy
import
numpy
from
itertools
import
product
from
itertools
import
product
,
chain
import
theano
import
theano
from
six
import
StringIO
from
six
import
StringIO
...
@@ -18,6 +19,8 @@ from ..basic_ops import GpuAllocEmpty
...
@@ -18,6 +19,8 @@ from ..basic_ops import GpuAllocEmpty
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
from
.
import
test_nnet
from
.
import
test_nnet
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
def
test_dnn_conv_desc_merge
():
def
test_dnn_conv_desc_merge
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
...
@@ -393,6 +396,9 @@ def test_dnn_tag():
...
@@ -393,6 +396,9 @@ def test_dnn_tag():
class
TestDnnInferShapes
(
utt
.
InferShapeTester
):
class
TestDnnInferShapes
(
utt
.
InferShapeTester
):
border_modes
=
[
'valid'
,
'full'
,
'half'
]
conv_modes
=
[
'conv'
,
'cross'
]
def
setUp
(
self
):
def
setUp
(
self
):
super
(
TestDnnInferShapes
,
self
)
.
setUp
()
super
(
TestDnnInferShapes
,
self
)
.
setUp
()
self
.
mode
=
mode_with_gpu
self
.
mode
=
mode_with_gpu
...
@@ -427,37 +433,25 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -427,37 +433,25 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnSoftmaxGrad
dnn
.
GpuDnnSoftmaxGrad
)
)
def
test_conv
(
self
):
def
_test_conv
(
self
,
img
,
kerns
,
out
,
img_val
,
kern_vals
,
border_mode
,
conv_mode
,
subsamples
,
algo
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
img
=
T
.
ftensor4
(
'img'
)
kerns
=
T
.
ftensor4
(
'kerns'
)
out
=
T
.
ftensor4
(
'out'
)
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
7
,
2
,
6
,
4
),
dtype
=
'float32'
)
kern_vals
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
8
,
2
,
4
,
3
),
dtype
=
'float32'
)
for
params
in
product
(
img_val
=
numpy
.
asarray
(
img_val
,
dtype
=
'float32'
)
[
'valid'
,
'full'
,
'half'
],
kern_vals
=
numpy
.
asarray
(
kern_vals
,
dtype
=
'float32'
)
[(
1
,
1
),
(
2
,
2
)],
[
'conv'
,
'cross'
]
for
subsample
in
subsamples
:
):
out_vals
=
numpy
.
zeros
(
out_vals
=
numpy
.
zeros
(
dnn
.
GpuDnnConv
.
get_out_shape
(
img_val
.
shape
,
kern_vals
.
shape
,
dnn
.
GpuDnnConv
.
get_out_shape
(
img_val
.
shape
,
kern_vals
.
shape
,
border_mode
=
params
[
0
]
,
border_mode
=
border_mode
,
subsample
=
params
[
1
]
),
subsample
=
subsample
),
dtype
=
'float32'
)
dtype
=
'float32'
)
desc
=
dnn
.
GpuDnnConvDesc
(
desc
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
params
[
0
]
,
border_mode
=
border_mode
,
subsample
=
params
[
1
]
,
subsample
=
subsample
,
conv_mode
=
params
[
2
]
conv_mode
=
conv_mode
)(
kerns
.
shape
)
)(
kerns
.
shape
)
conv
=
dnn
.
GpuDnnConv
()(
img
,
kerns
,
out
,
desc
)
conv
=
dnn
.
GpuDnnConv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
self
.
_compile_and_check
(
self
.
_compile_and_check
(
[
img
,
kerns
,
out
],
[
img
,
kerns
,
out
],
[
conv
],
[
conv
],
...
@@ -465,29 +459,56 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -465,29 +459,56 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnConv
dnn
.
GpuDnnConv
)
)
def
test_conv_gradw
(
self
):
@parameterized.expand
(
chain
(
product
([
SUPPORTED_DNN_CONV_ALGO_FWD
[
0
]],
border_modes
,
conv_modes
),
product
(
SUPPORTED_DNN_CONV_ALGO_FWD
[
1
:],
[
border_modes
[
0
]],
[
conv_modes
[
0
]])),
testcase_func_name
=
utt
.
custom_name_func
)
def
test_conv
(
self
,
algo
,
border_mode
,
conv_mode
):
if
algo
==
'winograd'
and
dnn
.
version
()
<
5000
:
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
self
.
_test_conv
(
T
.
ftensor4
(
'img'
),
T
.
ftensor4
(
'kerns'
),
T
.
ftensor4
(
'out'
),
numpy
.
random
.
rand
(
7
,
2
,
8
,
4
),
numpy
.
random
.
rand
(
8
,
2
,
4
,
3
),
border_mode
,
conv_mode
,
[(
1
,
1
),
(
2
,
2
)],
algo
)
@parameterized.expand
(
product
(
border_modes
,
conv_modes
),
utt
.
custom_name_func
)
def
test_conv3d_none
(
self
,
border_mode
,
conv_mode
):
ftensor5
=
T
.
TensorType
(
dtype
=
"float32"
,
broadcastable
=
(
False
,)
*
5
)
self
.
_test_conv
(
ftensor5
(
'img'
),
ftensor5
(
'kerns'
),
ftensor5
(
'out'
),
numpy
.
random
.
rand
(
10
,
2
,
6
,
4
,
11
),
numpy
.
random
.
rand
(
8
,
2
,
4
,
3
,
1
),
border_mode
,
conv_mode
,
[(
1
,
1
,
1
),
(
2
,
2
,
2
)],
'none'
)
def
_test_conv_gradw
(
self
,
img
,
kerns
,
out
,
img_val
,
kern_vals
,
border_mode
,
conv_mode
,
subsample
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
img
=
T
.
ftensor4
(
'img'
)
kerns
=
T
.
ftensor4
(
'kerns'
)
out
=
T
.
ftensor4
(
'out'
)
img_val
=
numpy
.
asarray
(
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
5
,
6
,
8
)
,
img_val
,
dtype
=
'float32'
dtype
=
'float32'
)
)
kern_vals
=
numpy
.
asarray
(
kern_vals
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
1
,
5
,
6
)
,
kern_vals
,
dtype
=
'float32'
dtype
=
'float32'
)
)
for
params
in
product
(
[
'valid'
,
'full'
,
'half'
],
[(
1
,
1
)],
# strides besides (1, 1)
[
'conv'
,
'cross'
]
):
temp_img
=
img
.
dimshuffle
(
1
,
0
,
2
,
3
)
temp_img
=
img
.
dimshuffle
(
1
,
0
,
2
,
3
)
temp_kerns
=
kerns
temp_kerns
=
kerns
if
params
[
2
]
==
'conv'
:
if
conv_mode
==
'conv'
:
temp_kerns
=
temp_kerns
[:,
:,
::
-
1
,
::
-
1
]
temp_kerns
=
temp_kerns
[:,
:,
::
-
1
,
::
-
1
]
temp_kerns
=
temp_kerns
.
dimshuffle
(
1
,
0
,
2
,
3
)
temp_kerns
=
temp_kerns
.
dimshuffle
(
1
,
0
,
2
,
3
)
shape
=
(
shape
=
(
...
@@ -497,9 +518,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -497,9 +518,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
)
)
out_vals
=
numpy
.
zeros
(
shape
,
dtype
=
'float32'
)
out_vals
=
numpy
.
zeros
(
shape
,
dtype
=
'float32'
)
desc
=
dnn
.
GpuDnnConvDesc
(
desc
=
dnn
.
GpuDnnConvDesc
(
border_mode
=
params
[
0
]
,
border_mode
=
border_mode
,
subsample
=
params
[
1
]
,
subsample
=
subsample
,
conv_mode
=
params
[
2
]
conv_mode
=
conv_mode
)(
out
.
shape
)
)(
out
.
shape
)
conv_grad_w
=
dnn
.
GpuDnnConvGradW
()(
conv_grad_w
=
dnn
.
GpuDnnConvGradW
()(
temp_img
,
temp_img
,
...
@@ -514,6 +535,17 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -514,6 +535,17 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnConvGradW
dnn
.
GpuDnnConvGradW
)
)
@parameterized.expand
(
product
(
border_modes
,
conv_modes
),
utt
.
custom_name_func
)
def
test_conv_gradw
(
self
,
border_mode
,
conv_mode
):
self
.
_test_conv_gradw
(
T
.
ftensor4
(
'img'
),
T
.
ftensor4
(
'kerns'
),
T
.
ftensor4
(
'out'
),
numpy
.
random
.
rand
(
2
,
5
,
6
,
8
),
numpy
.
random
.
rand
(
2
,
1
,
5
,
6
),
border_mode
,
conv_mode
,
(
1
,
1
))
def
test_conv_gradi
(
self
):
def
test_conv_gradi
(
self
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
...
theano/tests/unittest_tools.py
浏览文件 @
ef0fc56e
...
@@ -4,6 +4,7 @@ from functools import wraps
...
@@ -4,6 +4,7 @@ from functools import wraps
import
logging
import
logging
import
sys
import
sys
import
unittest
import
unittest
from
nose_parameterized
import
parameterized
from
six
import
integer_types
from
six
import
integer_types
from
six.moves
import
StringIO
from
six.moves
import
StringIO
...
@@ -31,6 +32,13 @@ except ImportError:
...
@@ -31,6 +32,13 @@ except ImportError:
_logger
=
logging
.
getLogger
(
"theano.tests.unittest_tools"
)
_logger
=
logging
.
getLogger
(
"theano.tests.unittest_tools"
)
def
custom_name_func
(
testcase_func
,
param_num
,
param
):
return
"
%
s_
%
s"
%
(
testcase_func
.
__name__
,
parameterized
.
to_safe_name
(
"_"
.
join
(
str
(
x
)
for
x
in
param
.
args
)),
)
def
fetch_seed
(
pseed
=
None
):
def
fetch_seed
(
pseed
=
None
):
"""
"""
Returns the seed to use for running the unit tests.
Returns the seed to use for running the unit tests.
...
@@ -96,6 +104,7 @@ verify_grad.E_grad = T.verify_grad.E_grad
...
@@ -96,6 +104,7 @@ verify_grad.E_grad = T.verify_grad.E_grad
class
TestOptimizationMixin
(
object
):
class
TestOptimizationMixin
(
object
):
def
assertFunctionContains
(
self
,
f
,
op
,
min
=
1
,
max
=
sys
.
maxsize
):
def
assertFunctionContains
(
self
,
f
,
op
,
min
=
1
,
max
=
sys
.
maxsize
):
toposort
=
f
.
maker
.
fgraph
.
toposort
()
toposort
=
f
.
maker
.
fgraph
.
toposort
()
matches
=
[
node
for
node
in
toposort
if
node
.
op
==
op
]
matches
=
[
node
for
node
in
toposort
if
node
.
op
==
op
]
...
@@ -172,6 +181,7 @@ class T_OpContractMixin(object):
...
@@ -172,6 +181,7 @@ class T_OpContractMixin(object):
class
InferShapeTester
(
unittest
.
TestCase
):
class
InferShapeTester
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
seed_rng
()
seed_rng
()
# Take into account any mode that may be defined in a child class
# Take into account any mode that may be defined in a child class
...
@@ -311,6 +321,7 @@ def str_diagnostic(expected, value, rtol, atol):
...
@@ -311,6 +321,7 @@ def str_diagnostic(expected, value, rtol, atol):
class
WrongValue
(
Exception
):
class
WrongValue
(
Exception
):
def
__init__
(
self
,
expected_val
,
val
,
rtol
,
atol
):
def
__init__
(
self
,
expected_val
,
val
,
rtol
,
atol
):
Exception
.
__init__
(
self
)
# to be compatible with python2.4
Exception
.
__init__
(
self
)
# to be compatible with python2.4
self
.
val1
=
expected_val
self
.
val1
=
expected_val
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论