Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
5561f5d6
提交
5561f5d6
authored
10月 18, 2017
作者:
Frédéric Bastien
提交者:
GitHub
10月 18, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5932 from notoraptor/tests-for-cudnn
Add a Python script to help run more exhaustive cuDNN algorithms tests
上级
ecc0c2b3
a5b08f9a
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
1477 行增加
和
7 行删除
+1477
-7
type.py
theano/gof/type.py
+2
-2
cudnn_defs.py
theano/gpuarray/cudnn_defs.py
+178
-3
check_dnn_conv.py
theano/gpuarray/tests/check_dnn_conv.py
+1136
-0
run_dnn_conv.py
theano/gpuarray/tests/run_dnn_conv.py
+159
-0
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+2
-2
没有找到文件。
theano/gof/type.py
浏览文件 @
5561f5d6
...
@@ -977,9 +977,9 @@ class EnumType(Type, dict):
...
@@ -977,9 +977,9 @@ class EnumType(Type, dict):
def
get_aliases
(
self
):
def
get_aliases
(
self
):
"""
"""
Return the
list
of all aliases in this enumeration.
Return the
sorted tuple
of all aliases in this enumeration.
"""
"""
return
self
.
aliases
.
keys
(
)
return
tuple
(
sorted
(
self
.
aliases
.
keys
())
)
def
__repr__
(
self
):
def
__repr__
(
self
):
names_to_aliases
=
{
constant_name
:
''
for
constant_name
in
self
}
names_to_aliases
=
{
constant_name
:
''
for
constant_name
in
self
}
...
...
theano/gpuarray/cudnn_defs.py
浏览文件 @
5561f5d6
...
@@ -9,9 +9,9 @@ for a given cuDNN version.
...
@@ -9,9 +9,9 @@ for a given cuDNN version.
Currently supported cuDNN APIs:
Currently supported cuDNN APIs:
- v5.1
- v5.1
*
- v6.0
- v6.0
*
- v7.0
- v7.0
*
"""
"""
...
@@ -19,6 +19,29 @@ from __future__ import absolute_import, print_function, division
...
@@ -19,6 +19,29 @@ from __future__ import absolute_import, print_function, division
from
theano.gof
import
CEnumType
from
theano.gof
import
CEnumType
HALF
,
FLOAT
,
DOUBLE
=
(
'float16'
,
'float32'
,
'float64'
)
TRUE_HALF_CONFIG
=
(
HALF
,
HALF
)
PSEUDO_HALF_CONFIG
=
(
HALF
,
FLOAT
)
FLOAT_CONFIG
=
(
FLOAT
,
FLOAT
)
DOUBLE_CONFIG
=
(
DOUBLE
,
DOUBLE
)
def
is_true_half_config
(
dtype
,
precision
):
return
dtype
==
precision
==
HALF
def
is_pseudo_half_config
(
dtype
,
precision
):
return
dtype
==
HALF
and
precision
==
FLOAT
def
is_float_config
(
dtype
,
precision
):
return
dtype
==
precision
==
FLOAT
def
is_double_config
(
dtype
,
precision
):
return
dtype
==
precision
==
DOUBLE
# NB: Some cuDNN algorithms are listed in cuDNN enums but not implemented.
# NB: Some cuDNN algorithms are listed in cuDNN enums but not implemented.
# We still register them here because we try to exactly copy cuDNN enums
# We still register them here because we try to exactly copy cuDNN enums
# in Python side, but they will have no aliases associated, to help
# in Python side, but they will have no aliases associated, to help
...
@@ -51,6 +74,8 @@ class CuDNNV51(object):
...
@@ -51,6 +74,8 @@ class CuDNNV51(object):
conv3d_fwd_algorithms
=
(
'none'
,
'small'
,
'fft_tiling'
)
conv3d_fwd_algorithms
=
(
'none'
,
'small'
,
'fft_tiling'
)
deterministic_fwd_algorithms
=
cudnnConvolutionFwdAlgo_t
.
get_aliases
()
cudnnConvolutionBwdFilterAlgo_t
=
CEnumType
((
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0'
,
'none'
),
cudnnConvolutionBwdFilterAlgo_t
=
CEnumType
((
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0'
,
'none'
),
(
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1'
,
'deterministic'
),
(
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1'
,
'deterministic'
),
(
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT'
,
'fft'
),
(
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT'
,
'fft'
),
...
@@ -61,6 +86,8 @@ class CuDNNV51(object):
...
@@ -61,6 +86,8 @@ class CuDNNV51(object):
conv3d_bwd_filter_algorithms
=
(
'none'
,
'small'
)
conv3d_bwd_filter_algorithms
=
(
'none'
,
'small'
)
deterministic_bwd_filter_algorithms
=
(
'deterministic'
,
'fft'
,
'winograd_non_fused'
)
cudnnConvolutionBwdDataAlgo_t
=
CEnumType
((
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
,
'none'
),
cudnnConvolutionBwdDataAlgo_t
=
CEnumType
((
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
,
'none'
),
(
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
,
'deterministic'
),
(
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
,
'deterministic'
),
(
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
,
'fft'
),
(
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
,
'fft'
),
...
@@ -72,6 +99,8 @@ class CuDNNV51(object):
...
@@ -72,6 +99,8 @@ class CuDNNV51(object):
conv3d_bwd_data_algorithms
=
(
'none'
,
'deterministic'
,
'fft_tiling'
)
conv3d_bwd_data_algorithms
=
(
'none'
,
'deterministic'
,
'fft_tiling'
)
deterministic_bwd_data_algorithms
=
(
'deterministic'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'winograd_non_fused'
)
cudnnPoolingMode_t
=
CEnumType
((
'CUDNN_POOLING_MAX'
,
'max'
),
cudnnPoolingMode_t
=
CEnumType
((
'CUDNN_POOLING_MAX'
,
'max'
),
(
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
,
'average_inc_pad'
),
(
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
,
'average_inc_pad'
),
(
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
,
'average_exc_pad'
),
(
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
,
'average_exc_pad'
),
...
@@ -93,6 +122,107 @@ class CuDNNV51(object):
...
@@ -93,6 +122,107 @@ class CuDNNV51(object):
# empty list of enum to don't crash with cudnn 5
# empty list of enum to don't crash with cudnn 5
cudnnReduceTensorOp_t
=
CEnumType
()
cudnnReduceTensorOp_t
=
CEnumType
()
def
get_supported_dtype_configs
(
self
,
check_runtime
=
None
):
"""
Return the tuple of data type configurations supported by this version of cuDNN.
This is currently convenient for all supported cuDNN versions, as Theano does not
yet support new data types (like INT8, INT8x4, etc.).
``check_runtime`` may be a function that tests if a data type configuration is supported.::
is_supported = check_runtime(dtype, precision)
.. warning::
From documentation for cudnnConvolutionForward (for both v5.1 and v6):
.. code-block::
TRUE_HALF_CONFIG is only supported on architectures with true fp16 support
(compute capability 5.3 and 6.0)
This seems to be a general remark about f16 support (not only for FWD).
It can be checked at runtime only.
"""
if
check_runtime
is
None
or
check_runtime
(
*
TRUE_HALF_CONFIG
):
return
(
TRUE_HALF_CONFIG
,
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
,
DOUBLE_CONFIG
)
return
(
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
,
DOUBLE_CONFIG
)
def
fwd_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
algorithms
=
self
.
cudnnConvolutionFwdAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
:
return
ndim
==
2
or
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_GEMM
:
return
ndim
==
2
and
not
is_true_half_config
(
dtype
,
precision
)
# CUDNN_CONVOLUTION_FWD_ALGO_DIRECT: not implemented.
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_FFT
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
:
if
ndim
==
2
:
return
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
if
ndim
==
3
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED
:
# NB: "If wDesc 's filter (height, width) is (5,5), data type config TRUE_HALF_CONFIG is not supported".
# We could not check it before being in C code.
return
ndim
==
2
and
not
is_double_config
(
dtype
,
precision
)
return
False
def
bwd_filter_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
# NB: Theano does not support float16 precision anymore for backward cuDNN convolutions.
if
is_true_half_config
(
dtype
,
precision
):
return
False
algorithms
=
self
.
cudnnConvolutionBwdFilterAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
:
return
ndim
==
2
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED
:
# NB: "If wDesc 's filter (height, width) is (5,5), data type config TRUE_HALF_CONFIG is not supported".
# We could not check it before being in C code.
return
ndim
==
2
and
not
is_double_config
(
dtype
,
precision
)
return
False
def
bwd_data_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
# NB: Theano does not support float16 precision anymore for backward cuDNN convolutions.
if
is_true_half_config
(
dtype
,
precision
):
return
False
algorithms
=
self
.
cudnnConvolutionBwdDataAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
:
# CUDNN_CONVOLUTION_BWD_DATA_ALGO_1: all data type configs supported.
# NB: Let's avoid float16 precision, as some strange errors may be encountered
# with that precision ( see https://github.com/Theano/Theano/pull/5932/ )
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
:
if
ndim
==
2
:
return
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
if
ndim
==
3
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED
:
# NB: "If wDesc 's filter (height, width) is (5,5), data type config TRUE_HALF_CONFIG is not supported".
# We could not check it before being in C code.
return
ndim
==
2
and
not
is_double_config
(
dtype
,
precision
)
return
False
class
CuDNNV6
(
CuDNNV51
):
class
CuDNNV6
(
CuDNNV51
):
version
=
6
version
=
6
...
@@ -123,6 +253,8 @@ class CuDNNV6(CuDNNV51):
...
@@ -123,6 +253,8 @@ class CuDNNV6(CuDNNV51):
(
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING'
,
'fft_tiling'
),
(
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING'
,
'fft_tiling'
),
ctype
=
'cudnnConvolutionBwdFilterAlgo_t'
)
ctype
=
'cudnnConvolutionBwdFilterAlgo_t'
)
deterministic_bwd_filter_algorithms
=
CuDNNV51
.
deterministic_bwd_filter_algorithms
+
(
'fft_tiling'
,)
cudnnReduceTensorOp_t
=
CEnumType
((
'CUDNN_REDUCE_TENSOR_ADD'
,
'add'
),
cudnnReduceTensorOp_t
=
CEnumType
((
'CUDNN_REDUCE_TENSOR_ADD'
,
'add'
),
(
'CUDNN_REDUCE_TENSOR_MUL'
,
'mul'
),
(
'CUDNN_REDUCE_TENSOR_MUL'
,
'mul'
),
(
'CUDNN_REDUCE_TENSOR_MIN'
,
'minimum'
),
(
'CUDNN_REDUCE_TENSOR_MIN'
,
'minimum'
),
...
@@ -133,6 +265,49 @@ class CuDNNV6(CuDNNV51):
...
@@ -133,6 +265,49 @@ class CuDNNV6(CuDNNV51):
(
'CUDNN_REDUCE_TENSOR_NORM2'
,
'norm2'
),
(
'CUDNN_REDUCE_TENSOR_NORM2'
,
'norm2'
),
ctype
=
'cudnnReduceTensorOp_t'
)
ctype
=
'cudnnReduceTensorOp_t'
)
def
fwd_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
fwd_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
)
if
not
is_supported
:
algorithms
=
self
.
cudnnConvolutionFwdAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
:
# NB: For cuDNN V6:
# "Data Type Config Support: PSEUDO_HALF_CONFIG, FLOAT_CONFIG
# (DOUBLE_CONFIG is also supported when the task can be handled by 1D FFT,
# ie, one of the filter dimension, width or height is 1)"
# Could be checked only in C code. By default, let's allow DOUBLE_CONFIG.
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
or
is_double_config
(
dtype
,
precision
))
return
is_supported
def
bwd_filter_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
bwd_filter_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
)
if
not
is_supported
:
algorithms
=
self
.
cudnnConvolutionBwdFilterAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
or
is_double_config
(
dtype
,
precision
))
return
is_supported
def
bwd_data_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
bwd_data_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
)
if
not
is_supported
:
algorithms
=
self
.
cudnnConvolutionBwdDataAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
:
# NB: For cuDNN V6:
# "Data Type Config Support: PSEUDO_HALF_CONFIG, FLOAT_CONFIG
# (DOUBLE_CONFIG is also supported when the task can be handled by 1D FFT,
# ie, one of the filter dimension, width or height is 1)"
# Could be checked only in C code. By default, let's allow DOUBLE_CONFIG.
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
or
is_double_config
(
dtype
,
precision
))
return
is_supported
class
CuDNNV7
(
CuDNNV6
):
class
CuDNNV7
(
CuDNNV6
):
version
=
7
version
=
7
...
...
theano/gpuarray/tests/check_dnn_conv.py
0 → 100644
浏览文件 @
5561f5d6
#!/usr/bin/env python
# Without args, this script executes all its tests like `nosetests -vs`
# python check_dnn_conv.py
# If there is only one arg `infos`, this script prints some infos about
# supported algorithms and data type configurations for current GPU and cuDNN version.
# python check_dnn_conv.py infos
# If there is only one arg `list`, this script prints all test cases without running them.
# python check_dnn_conv.py list
# Else, any arg will be directly passed to nosetests.
# python check_dnn_conv.py -xvs # nosetests: verbose mode, capture output, exit at first error.
from
__future__
import
absolute_import
,
print_function
,
division
import
math
import
sys
from
itertools
import
product
,
chain
import
nose
import
numpy
as
np
from
nose.plugins.skip
import
SkipTest
import
theano
import
theano.tests.unittest_tools
as
utt
from
theano.compat
import
ifilter
from
theano.compile.ops
import
shape_i_op
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_RUNTIME
from
theano.gpuarray
import
cudnn_defs
from
theano.gpuarray.basic_ops
import
infer_context_name
,
as_gpuarray_variable
,
gpu_contiguous
,
GpuAllocEmpty
from
theano.gpuarray.dnn
import
GpuDnnConvDesc
,
GpuDnnConv
,
GpuDnnConvGradW
,
GpuDnnConvGradI
,
version
,
get_precision
from
theano.gpuarray.tests.config
import
mode_with_gpu
,
ref_cast
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
,
assert_conv_shape
from
theano.tensor.nnet.corr
import
CorrMM
,
CorrMM_gradInputs
,
CorrMM_gradWeights
from
theano.tensor.nnet.corr3d
import
Corr3dMM
,
Corr3dMM_gradInputs
,
Corr3dMM_gradWeights
from
theano.tensor.opt
import
Assert
# We provide a special implementation of dnn_conv, dnn_gradweight and dnn_gradinput
# that support alpha, beta and out as parameters.
def
dnn_conv
(
img
,
kerns
,
alpha
=
1
,
beta
=
0
,
out
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
),
conv_mode
=
'conv'
,
algo
=
None
,
precision
=
None
):
ctx_name
=
infer_context_name
(
img
,
kerns
)
img
=
gpu_contiguous
(
as_gpuarray_variable
(
img
,
ctx_name
))
kerns
=
gpu_contiguous
(
as_gpuarray_variable
(
kerns
,
ctx_name
))
precision
=
get_precision
(
precision
,
[
img
,
kerns
])
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
desc_op
=
desc
.
owner
.
op
# We can use Shape_i and bypass the infer_shape here as this is on
# the input of node and it will always be present.
ishape
=
[
shape_i_op
(
i
)(
img
)
for
i
in
range
(
img
.
ndim
)]
kshape
=
[
shape_i_op
(
i
)(
kerns
)
for
i
in
range
(
kerns
.
ndim
)]
out_shp
=
get_conv_output_shape
(
ishape
,
kshape
,
desc_op
.
border_mode
,
desc_op
.
subsample
,
filter_dilation
=
dilation
)
out_shp
=
assert_conv_shape
(
out_shp
)
if
beta
==
0
:
real_out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
else
:
assert
out
is
not
None
out
=
gpu_contiguous
(
as_gpuarray_variable
(
out
,
ctx_name
))
check
=
Assert
(
'GpuDnnConv: qiven output (for beta not null) does not have expected shape'
)
real_out
=
check
(
out
,
theano
.
tensor
.
all
(
theano
.
tensor
.
eq
(
out
.
shape
,
out_shp
)))
return
GpuDnnConv
(
algo
=
algo
)(
img
,
kerns
,
real_out
,
desc
,
alpha
,
beta
)
def
dnn_gradweight
(
img
,
topgrad
,
kerns_shp
,
alpha
=
1
,
beta
=
0
,
out
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
),
conv_mode
=
'conv'
,
algo
=
None
,
precision
=
None
):
ctx_name
=
infer_context_name
(
img
,
topgrad
)
img
=
gpu_contiguous
(
as_gpuarray_variable
(
img
,
ctx_name
))
topgrad
=
gpu_contiguous
(
as_gpuarray_variable
(
topgrad
,
ctx_name
))
kerns_shp
=
theano
.
tensor
.
as_tensor_variable
(
kerns_shp
)
precision
=
get_precision
(
precision
,
[
img
,
topgrad
],
for_grad
=
True
)
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns_shp
)
if
beta
==
0
:
real_out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
kerns_shp
)
else
:
assert
out
is
not
None
out
=
gpu_contiguous
(
as_gpuarray_variable
(
out
,
ctx_name
))
check
=
Assert
(
'GpuDnnConvGradW: qiven output (for beta not null) does not have expected shape'
)
real_out
=
check
(
out
,
theano
.
tensor
.
all
(
theano
.
tensor
.
eq
(
out
.
shape
,
kerns_shp
)))
return
GpuDnnConvGradW
(
algo
=
algo
)(
img
,
topgrad
,
real_out
,
desc
,
alpha
,
beta
)
def
dnn_gradinput
(
kerns
,
topgrad
,
img_shp
,
alpha
=
1
,
beta
=
0
,
out
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
),
conv_mode
=
'conv'
,
algo
=
None
,
precision
=
None
):
ctx_name
=
infer_context_name
(
kerns
,
topgrad
)
kerns
=
gpu_contiguous
(
as_gpuarray_variable
(
kerns
,
ctx_name
))
topgrad
=
gpu_contiguous
(
as_gpuarray_variable
(
topgrad
,
ctx_name
))
img_shp
=
theano
.
tensor
.
as_tensor_variable
(
img_shp
)
precision
=
get_precision
(
precision
,
[
kerns
,
topgrad
],
for_grad
=
True
)
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
if
beta
==
0
:
real_out
=
GpuAllocEmpty
(
dtype
=
kerns
.
dtype
,
context_name
=
ctx_name
)(
*
img_shp
)
else
:
assert
out
is
not
None
out
=
gpu_contiguous
(
as_gpuarray_variable
(
out
,
ctx_name
))
check
=
Assert
(
'GpuDnnConvGradI: qiven output (for beta not null) does not have expected shape'
)
real_out
=
check
(
out
,
theano
.
tensor
.
all
(
theano
.
tensor
.
eq
(
out
.
shape
,
img_shp
)))
return
GpuDnnConvGradI
(
algo
=
algo
)(
kerns
,
topgrad
,
real_out
,
desc
,
alpha
,
beta
)
def
check_dtype_config_support
(
dtype
,
precision
):
# We use FWD 2D to check it.
# Based on documentation, algo small (CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM)
# should support all configurations, for both v5.1, v6 and v7.
inputs
=
theano
.
shared
(
np
.
zeros
((
1
,
1
,
2
,
2
),
dtype
=
dtype
))
filters
=
theano
.
shared
(
np
.
zeros
((
1
,
1
,
2
,
2
),
dtype
=
dtype
))
conv
=
dnn_conv
(
inputs
,
filters
,
precision
=
precision
,
algo
=
'small'
)
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
try
:
f
()
except
RuntimeError
as
e
:
assert
'CUDNN_STATUS_ARCH_MISMATCH'
in
e
.
message
return
False
return
True
cudnn
=
cudnn_defs
.
get_definitions
(
version
(
raises
=
False
))
class
ConvCase
:
"""
Helper class to describe a special test case quickly.
This handles only 2D and 3D cases.
"""
FWD
,
GRADINPUT
,
GRADWEIGHT
=
0
,
1
,
2
def
__init__
(
self
,
type
,
inputs_shape
,
filters_shape
,
algo
=
None
,
dtype
=
None
,
precision
=
None
,
subsample
=
None
,
dilation
=
None
,
border_mode
=
'valid'
,
conv_mode
=
'conv'
,
alpha
=
1
,
beta
=
0
,
should_fail
=
False
):
assert
type
in
(
ConvCase
.
FWD
,
ConvCase
.
GRADINPUT
,
ConvCase
.
GRADWEIGHT
)
assert
len
(
inputs_shape
)
==
len
(
filters_shape
)
in
(
4
,
5
)
ndim
=
len
(
inputs_shape
)
-
2
if
dtype
is
None
:
dtype
=
theano
.
config
.
floatX
if
precision
is
None
:
precision
=
theano
.
config
.
floatX
if
subsample
is
None
:
subsample
=
(
1
,)
*
ndim
if
dilation
is
None
:
dilation
=
(
1
,)
*
ndim
assert
dtype
in
(
'float16'
,
'float32'
,
'float64'
)
assert
precision
in
(
'float16'
,
'float32'
,
'float64'
)
assert
len
(
subsample
)
==
len
(
dilation
)
==
ndim
assert
(
border_mode
in
(
'valid'
,
'full'
,
'half'
)
or
(
isinstance
(
border_mode
,
(
list
,
tuple
))
and
len
(
border_mode
)
==
ndim
))
assert
conv_mode
in
(
'conv'
,
'cross'
)
assert
alpha
!=
0
self
.
type
=
type
self
.
ndim
=
ndim
self
.
algo
=
algo
self
.
inputs_shape
=
inputs_shape
self
.
filters_shape
=
filters_shape
self
.
dtype
=
dtype
self
.
precision
=
precision
self
.
subsample
=
subsample
self
.
dilation
=
dilation
self
.
border_mode
=
border_mode
self
.
conv_mode
=
conv_mode
self
.
alpha
=
alpha
self
.
beta
=
beta
self
.
should_fail
=
bool
(
should_fail
)
def
is_fwd
(
self
):
return
self
.
type
==
ConvCase
.
FWD
def
is_bwd_filter
(
self
):
return
self
.
type
==
ConvCase
.
GRADWEIGHT
def
is_bwd_data
(
self
):
return
self
.
type
==
ConvCase
.
GRADINPUT
def
get_case
(
self
):
return
(
self
.
algo
,
self
.
dtype
,
self
.
precision
,
(
self
.
inputs_shape
,
self
.
filters_shape
,
self
.
subsample
,
self
.
dilation
,
self
.
border_mode
,
self
.
conv_mode
,
self
.
alpha
,
self
.
beta
))
@staticmethod
def
fwd
(
*
args
,
**
kwargs
):
return
ConvCase
(
ConvCase
.
FWD
,
*
args
,
**
kwargs
)
@staticmethod
def
bwd_filter
(
*
args
,
**
kwargs
):
return
ConvCase
(
ConvCase
.
GRADWEIGHT
,
*
args
,
**
kwargs
)
@staticmethod
def
bwd_data
(
*
args
,
**
kwargs
):
return
ConvCase
(
ConvCase
.
GRADINPUT
,
*
args
,
**
kwargs
)
class
ConvCaseGenerator
:
"""
Main class used to generate test cases.
This handles only 2D and 3D cases.
"""
def
_as_tuple_of_tuples
(
self
,
iterable
):
return
tuple
(
tuple
(
sequence
)
for
sequence
in
iterable
)
def
__init__
(
self
,
ndim
,
alpha
=
2
,
beta
=-
3
,
batch_size
=
2
,
input_channels
=
3
,
inputs_sizes
=
None
,
output_channels
=
2
,
filters_sizes
=
None
,
subsamples
=
None
,
dilations
=
None
,
borders
=
None
,
with_border_valid
=
True
,
with_border_half
=
True
,
with_border_full
=
True
):
self
.
ndim
=
int
(
ndim
)
self
.
alpha
=
float
(
alpha
)
self
.
beta
=
float
(
beta
)
self
.
batch_size
=
int
(
batch_size
)
self
.
input_channels
=
int
(
input_channels
)
self
.
output_channels
=
int
(
output_channels
)
assert
self
.
ndim
in
(
2
,
3
)
assert
self
.
alpha
!=
0
assert
self
.
batch_size
>
0
assert
self
.
input_channels
>
0
assert
self
.
output_channels
>
0
# NB: it is quite arbitrary to choose default values for inputs sizes and filters sizes.
# Here, we just put some values that may generate errors in some cases, but that should be OK for other cases.
# For instance, input size 300 is > 256, that is a limit for certain algorithms (cf. documentation).
# Filter size 40 is > 32 and > 16, that are limits for certain algorithms (cf. documentation).
# We should either manually specify sizes, or give an appropriate filter to this generator
# before testing values (see `self.get_cases()`).
if
inputs_sizes
is
None
:
inputs_sizes
=
((
5
,)
*
self
.
ndim
,
(
300
,
5
)
+
(
2
,)
*
(
self
.
ndim
-
2
))
if
filters_sizes
is
None
:
filters_sizes
=
((
4
,)
*
self
.
ndim
,
(
40
,
4
)
+
(
2
,)
*
(
self
.
ndim
-
2
))
if
borders
is
None
:
borders
=
((
1
,)
*
self
.
ndim
,
tuple
(
range
(
1
,
self
.
ndim
+
1
)))
if
subsamples
is
None
:
subsamples
=
((
1
,)
*
self
.
ndim
,
tuple
(
range
(
1
,
self
.
ndim
+
1
)))
if
dilations
is
None
:
dilations
=
((
1
,)
*
self
.
ndim
,)
if
cudnn
.
version
>=
6
:
dilations
+=
(
tuple
(
range
(
1
,
self
.
ndim
+
1
)),)
for
sequence_list
in
(
inputs_sizes
,
filters_sizes
,
borders
,
subsamples
,
dilations
):
assert
(
isinstance
(
sequence_list
,
(
tuple
,
list
))
and
all
(
isinstance
(
sequence
,
(
tuple
,
list
))
and
len
(
sequence
)
==
self
.
ndim
for
sequence
in
sequence_list
)),
(
self
.
ndim
,
sequence_list
)
self
.
auto_borders
=
tuple
()
if
with_border_valid
:
self
.
auto_borders
+=
(
'valid'
,)
if
with_border_half
:
self
.
auto_borders
+=
(
'half'
,)
if
with_border_full
:
self
.
auto_borders
+=
(
'full'
,)
self
.
inputs_sizes
=
self
.
_as_tuple_of_tuples
(
inputs_sizes
)
self
.
filters_sizes
=
self
.
_as_tuple_of_tuples
(
filters_sizes
)
self
.
borders
=
self
.
_as_tuple_of_tuples
(
borders
)
self
.
subsamples
=
self
.
_as_tuple_of_tuples
(
subsamples
)
self
.
dilations
=
self
.
_as_tuple_of_tuples
(
dilations
)
@staticmethod
def
get_if_valid_conv_output_shape
(
case_tuple
):
# Filter function to keep only cases that produce valid convolution output shapes.
out_shp
=
get_conv_output_shape
(
case_tuple
[
0
],
# input shape
case_tuple
[
1
],
# filter shape
case_tuple
[
4
],
# border mode
case_tuple
[
2
],
# subsample
case_tuple
[
3
])
# dilation
try
:
return
assert_conv_shape
(
out_shp
)
except
ValueError
:
return
False
def
get_cases
(
self
,
filter
=
None
):
# Generate an iterator of tuples with format:
# (input shape, filter shape, subsample, dilation, border mode, convolution mode, alpha, beta)
# filter may be a callable that gets one tuple (with format specified above) and returns
# a boolean, so that tuple is kept only if filter(tuple) is True.
all_batch_sizes
=
(
self
.
batch_size
,)
all_input_channels
=
(
self
.
input_channels
,)
all_input_sizes
=
self
.
inputs_sizes
all_output_channels
=
(
self
.
output_channels
,)
all_filter_sizes
=
self
.
filters_sizes
all_subsamples
=
self
.
subsamples
all_dilations
=
self
.
dilations
all_border_modes
=
self
.
auto_borders
+
self
.
borders
all_conv_modes
=
(
'conv'
,
'cross'
)
all_alphas
=
(
self
.
alpha
,)
all_betas
=
(
0
,)
if
self
.
beta
==
0
else
(
0
,
self
.
beta
)
all_input_shapes
=
((
bs
,
ic
)
+
ins
for
bs
in
all_batch_sizes
for
ic
in
all_input_channels
for
ins
in
all_input_sizes
)
all_filter_shapes
=
((
oc
,
ic
)
+
fis
for
oc
in
all_output_channels
for
ic
in
all_input_channels
for
fis
in
all_filter_sizes
)
if
callable
(
filter
):
def
local_filter
(
case_tuple
):
return
ConvCaseGenerator
.
get_if_valid_conv_output_shape
(
case_tuple
)
and
filter
(
case_tuple
)
else
:
local_filter
=
ConvCaseGenerator
.
get_if_valid_conv_output_shape
return
ifilter
(
local_filter
,
product
(
all_input_shapes
,
all_filter_shapes
,
all_subsamples
,
all_dilations
,
all_border_modes
,
all_conv_modes
,
all_alphas
,
all_betas
))
class
ConvCaseGeneratorChain
:
"""
Helper class to concatenate many conv case generators.
"""
def
__init__
(
self
,
*
conv_case_generators
):
assert
all
(
isinstance
(
g
,
ConvCaseGenerator
)
for
g
in
conv_case_generators
)
self
.
generators
=
conv_case_generators
def
get_cases
(
self
,
filter
=
None
):
return
chain
(
*
[
generator
.
get_cases
(
filter
)
for
generator
in
self
.
generators
])
class
CuDNNV51ConvCaseGenerator
(
object
):
"""
Helper class to generate specific test cases for every algorithm supported by cuDNN V5.1.
Same class exists for cuDNN V6.0 (see below).
This should help avoid test cases that are intended to fail according to cuDNN documentation.
"""
NONE
=
'none'
FFT
=
'fft'
FFT_TILING
=
'fft_tiling'
WINOGRAD
=
'winograd'
WINOGRAD_NON_FUSED
=
'winograd_non_fused'
# Protected interface.
def
_dilations
(
self
,
ndim
):
return
[(
1
,)
*
ndim
]
def
_fwd_fft
(
self
,
ndim
):
inputs_sizes
=
[(
10
,)
*
ndim
,
(
240
,
5
)
+
(
2
,)
*
(
ndim
-
2
)]
filters_sizes
=
[
tuple
(
range
(
9
,
9
-
ndim
,
-
1
))]
subsamples
=
[(
1
,)
*
ndim
]
return
ConvCaseGenerator
(
ndim
=
ndim
,
inputs_sizes
=
inputs_sizes
,
filters_sizes
=
filters_sizes
,
subsamples
=
subsamples
,
dilations
=
self
.
_dilations
(
ndim
))
def
_fwd_fft_tiling
(
self
,
ndim
,
dtype
,
precision
):
if
ndim
==
2
:
filters_sizes
=
[(
32
,
5
)]
if
ndim
==
3
:
filters_sizes
=
[(
16
,
5
,
5
)]
subsamples
=
[(
1
,)
*
ndim
]
return
ConvCaseGenerator
(
ndim
=
ndim
,
filters_sizes
=
filters_sizes
,
subsamples
=
subsamples
,
dilations
=
self
.
_dilations
(
ndim
))
def
_fwd_winograd
(
self
,
ndim
):
filters_sizes
=
[(
3
,)
*
ndim
]
subsamples
=
[(
1
,)
*
ndim
]
return
ConvCaseGenerator
(
ndim
=
ndim
,
filters_sizes
=
filters_sizes
,
subsamples
=
subsamples
,
dilations
=
self
.
_dilations
(
ndim
))
def
_fwd_winograd_non_fused
(
self
,
ndim
,
dtype
,
precision
):
filters_sizes
=
[(
3
,)
*
ndim
]
if
not
(
dtype
==
precision
==
'float16'
):
filters_sizes
+=
[(
5
,)
*
ndim
]
subsamples
=
[(
1
,)
*
ndim
]
return
ConvCaseGenerator
(
ndim
=
ndim
,
filters_sizes
=
filters_sizes
,
subsamples
=
subsamples
,
dilations
=
self
.
_dilations
(
ndim
))
def
_gw_fft
(
self
,
ndim
):
return
self
.
_fwd_fft
(
ndim
)
def
_gw_winograd_non_fused
(
self
,
ndim
,
dtype
,
precision
):
return
self
.
_fwd_winograd_non_fused
(
ndim
,
dtype
,
precision
)
def
_gi_fft
(
self
,
ndim
):
return
self
.
_fwd_fft
(
ndim
)
def
_gi_fft_tiling
(
self
,
ndim
,
dtype
,
precision
):
return
self
.
_fwd_fft_tiling
(
ndim
,
dtype
,
precision
)
def
_gi_winograd
(
self
,
ndim
):
return
self
.
_fwd_winograd
(
ndim
)
def
_gi_winograd_non_fused
(
self
,
ndim
,
dtype
,
precision
):
return
self
.
_fwd_winograd_non_fused
(
ndim
,
dtype
,
precision
)
def
_fwd_runtime
(
self
,
ndim
,
dtype
,
precision
):
return
ConvCaseGenerator
(
ndim
=
ndim
,
dilations
=
self
.
_dilations
(
ndim
))
def
_gw_runtime
(
self
,
ndim
,
dtype
,
precision
):
return
self
.
_fwd_runtime
(
ndim
,
dtype
,
precision
)
def
_gi_runtime
(
self
,
ndim
,
dtype
,
precision
):
return
self
.
_fwd_runtime
(
ndim
,
dtype
,
precision
)
# Public interface.
def
fwd
(
self
,
algo
,
ndim
,
dtype
,
precision
):
if
algo
==
self
.
FFT
:
return
self
.
_fwd_fft
(
ndim
)
if
algo
==
self
.
FFT_TILING
:
return
self
.
_fwd_fft_tiling
(
ndim
,
dtype
,
precision
)
if
algo
==
self
.
WINOGRAD
:
return
self
.
_fwd_winograd
(
ndim
)
if
algo
==
self
.
WINOGRAD_NON_FUSED
:
return
self
.
_fwd_winograd_non_fused
(
ndim
,
dtype
,
precision
)
if
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
return
self
.
_fwd_runtime
(
ndim
,
dtype
,
precision
)
return
ConvCaseGenerator
(
ndim
=
ndim
,
dilations
=
self
.
_dilations
(
ndim
))
def
gw
(
self
,
algo
,
ndim
,
dtype
,
precision
):
if
algo
==
self
.
FFT
:
return
self
.
_gw_fft
(
ndim
)
if
algo
==
self
.
WINOGRAD_NON_FUSED
:
return
self
.
_gw_winograd_non_fused
(
ndim
,
dtype
,
precision
)
if
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
return
self
.
_gw_runtime
(
ndim
,
dtype
,
precision
)
return
ConvCaseGenerator
(
ndim
=
ndim
,
dilations
=
self
.
_dilations
(
ndim
))
def
gi
(
self
,
algo
,
ndim
,
dtype
,
precision
):
if
algo
==
self
.
FFT
:
return
self
.
_gi_fft
(
ndim
)
if
algo
==
self
.
FFT_TILING
:
return
self
.
_gi_fft_tiling
(
ndim
,
dtype
,
precision
)
if
algo
==
self
.
WINOGRAD
:
return
self
.
_gi_winograd
(
ndim
)
if
algo
==
self
.
WINOGRAD_NON_FUSED
:
return
self
.
_gi_winograd_non_fused
(
ndim
,
dtype
,
precision
)
if
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
return
self
.
_gi_runtime
(
ndim
,
dtype
,
precision
)
return
ConvCaseGenerator
(
ndim
=
ndim
,
dilations
=
self
.
_dilations
(
ndim
))
class
CuDNNV6ConvCaseGenerator
(
CuDNNV51ConvCaseGenerator
):
def
_fwd_none
(
self
,
ndim
):
# All dilations allowed.
return
ConvCaseGenerator
(
ndim
=
ndim
)
def
_fwd_fft_tiling
(
self
,
ndim
,
dtype
,
precision
):
if
ndim
==
2
:
subsamples
=
[(
1
,
1
)]
# wDesc's filter height must be greater than convDesc's zero-padding height
# wDesc's filter width must be greater than convDesc's zero-padding width
generators
=
[]
if
(
dtype
,
precision
)
!=
(
'float64'
,
'float64'
):
# Filter sizes with every dimension != 1 is not supported for DOUBLE_CONFIG.
filters_sizes
=
[(
32
,
5
),
(
10
,
10
)]
borders
=
[(
1
,
1
),
(
6
,
4
)]
generators
+=
[
ConvCaseGenerator
(
ndim
=
ndim
,
dilations
=
self
.
_dilations
(
ndim
),
subsamples
=
subsamples
,
filters_sizes
=
filters_sizes
,
borders
=
borders
)]
filters_sizes
=
[(
256
,
1
),
(
5
,
1
)]
borders
=
[(
1
,
0
),
(
2
,
0
)]
generators
+=
[
ConvCaseGenerator
(
ndim
=
ndim
,
dilations
=
self
.
_dilations
(
ndim
),
subsamples
=
subsamples
,
filters_sizes
=
filters_sizes
,
borders
=
borders
)]
return
ConvCaseGeneratorChain
(
*
generators
)
if
ndim
==
3
:
return
super
(
CuDNNV6ConvCaseGenerator
,
self
)
.
_fwd_fft_tiling
(
ndim
,
dtype
,
precision
)
def
_gw_none
(
self
,
ndim
):
return
self
.
_fwd_none
(
ndim
)
def
_gw_fft_tiling
(
self
,
ndim
):
inputs_sizes
=
[(
247
,
1
),
(
20
,
1
)]
filters_sizes
=
[(
3
,
1
),
(
10
,
1
)]
subsamples
=
[(
1
,)
*
ndim
]
borders
=
[(
1
,
0
),
(
2
,
0
)]
return
ConvCaseGenerator
(
ndim
=
ndim
,
inputs_sizes
=
inputs_sizes
,
filters_sizes
=
filters_sizes
,
subsamples
=
subsamples
,
borders
=
borders
,
dilations
=
self
.
_dilations
(
ndim
))
def
_gi_none
(
self
,
ndim
):
return
self
.
_fwd_none
(
ndim
)
def
_fwd_runtime
(
self
,
ndim
,
dtype
,
precision
):
if
ndim
==
2
and
dtype
==
precision
==
'float16'
:
return
ConvCaseGenerator
(
ndim
=
ndim
,
dilations
=
self
.
_dilations
(
ndim
))
return
super
(
CuDNNV6ConvCaseGenerator
,
self
)
.
_fwd_runtime
(
ndim
,
dtype
,
precision
)
def
_gw_runtime
(
self
,
ndim
,
dtype
,
precision
):
if
ndim
==
2
and
dtype
==
precision
==
'float16'
:
return
ConvCaseGenerator
(
ndim
=
ndim
,
dilations
=
self
.
_dilations
(
ndim
))
return
super
(
CuDNNV6ConvCaseGenerator
,
self
)
.
_gw_runtime
(
ndim
,
dtype
,
precision
)
def
_gi_runtime
(
self
,
ndim
,
dtype
,
precision
):
if
ndim
==
2
and
dtype
==
precision
==
'float16'
:
return
ConvCaseGenerator
(
ndim
=
ndim
,
dilations
=
self
.
_dilations
(
ndim
))
return
super
(
CuDNNV6ConvCaseGenerator
,
self
)
.
_gi_runtime
(
ndim
,
dtype
,
precision
)
def
fwd
(
self
,
algo
,
ndim
,
dtype
,
precision
):
if
algo
==
self
.
NONE
:
return
self
.
_fwd_none
(
ndim
)
return
super
(
CuDNNV6ConvCaseGenerator
,
self
)
.
fwd
(
algo
,
ndim
,
dtype
,
precision
)
def
gw
(
self
,
algo
,
ndim
,
dtype
,
precision
):
if
algo
==
self
.
NONE
:
return
self
.
_gw_none
(
ndim
)
if
algo
==
self
.
FFT_TILING
:
return
self
.
_gw_fft_tiling
(
ndim
)
return
super
(
CuDNNV6ConvCaseGenerator
,
self
)
.
gw
(
algo
,
ndim
,
dtype
,
precision
)
def
gi
(
self
,
algo
,
ndim
,
dtype
,
precision
):
if
algo
==
self
.
NONE
:
return
self
.
_gi_none
(
ndim
)
return
super
(
CuDNNV6ConvCaseGenerator
,
self
)
.
gi
(
algo
,
ndim
,
dtype
,
precision
)
cudnn_conv_case_generator
=
CuDNNV51ConvCaseGenerator
()
if
cudnn
.
version
<
6
else
CuDNNV6ConvCaseGenerator
()
class
BaseTestDnnConv
(
object
):
"""
Base class for exhaustive tests. Use its subclasses
to run actual tests.
"""
# Abstract attributes.
ndim
=
2
fwd_algorithms
=
None
bwd_filter_algorithms
=
None
bwd_data_algorithms
=
None
cpu_conv_class
=
None
cpu_gradinput_class
=
None
cpu_gradweight_class
=
None
special_cases
=
[]
# List of special ConvCases.
runtime_shapes
=
[]
# Tuple of tuples with format: n_times, (inputs_shape, filters_shape)
# Utility methods.
def
_next_ten_exponent
(
self
,
val
):
# Return exponent for the next ten power that follows val.
# val should be a positive integer.
# Examples:
# for 0 to 9, returns 1 (=> 10**1 == 10)
# for 10 to 99, returns 2 (=> 10**2 == 100)
ten_exponent
=
1
while
val
//
10
>
0
:
ten_exponent
+=
1
val
//=
10
return
ten_exponent
def
scale_numpy_arrays_inplace
(
self
,
A
,
B
,
alpha
):
scale_factor
=
1
# Scale down simultaneously A and B if alpha is not 1.
if
alpha
!=
1
:
scale_factor
*=
alpha
# Normalize A and B simultaneously so that any values in these tensors are in interval [0, 1)
max_a
=
math
.
floor
(
abs
(
A
.
max
()))
max_b
=
math
.
floor
(
abs
(
B
.
max
()))
if
max_a
or
max_b
:
m_a
=
self
.
_next_ten_exponent
(
max_a
)
m_b
=
self
.
_next_ten_exponent
(
max_b
)
max_m
=
max
(
m_a
,
m_b
)
scale_factor
*=
10
**
max_m
if
scale_factor
!=
1
:
A
/=
scale_factor
B
/=
scale_factor
def
get_atol_rtol
(
self
,
algo
,
dtype
,
precision
):
if
dtype
==
'float16'
:
# Raise tolerance for float16
return
(
5e-2
,
5e-2
)
if
algo
==
'winograd_non_fused'
and
dtype
==
precision
==
'float32'
:
# Raise tolerance for winograd_non_fused in FLOAT_CONFIG.
return
(
1e-4
,
1e-4
)
return
None
,
None
def
__init__
(
self
):
utt
.
seed_rng
(
1234
)
self
.
dtype_configs
=
cudnn
.
get_supported_dtype_configs
(
check_dtype_config_support
)
def
array_like_conv_output
(
self
,
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
,
dtype
):
# Return a random array with inferred convolution output shape.
out_shp
=
get_conv_output_shape
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
)
out_shp
=
assert_conv_shape
(
out_shp
)
return
np
.
random
.
random
(
out_shp
)
.
astype
(
dtype
)
def
run_conv_fwd
(
self
,
algo
,
dtype
,
precision
,
parameters
):
inputs_shape
,
filters_shape
,
subsample
,
dilation
,
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
dtype
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
dtype
)
# Scale down the input values to prevent very large absolute errors
# due to float rounding
inputs_val
/=
10
filters_val
/=
10
inputs
=
theano
.
shared
(
inputs_val
)
filters
=
theano
.
shared
(
filters_val
)
if
beta
==
0
:
out
=
None
else
:
out
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
,
dtype
)
out
/=
10
# Compile a theano function for the cuDNN implementation
conv
=
dnn_conv
(
img
=
inputs
,
kerns
=
filters
,
alpha
=
alpha
,
beta
=
beta
,
out
=
out
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
algo
=
algo
,
precision
=
precision
)
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
# If conv_mode is 'conv' the reference implementation should use
# filters flipped according to the width, height and time axis
if
conv_mode
==
'conv'
:
if
inputs
.
ndim
==
5
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
else
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
else
:
flipped_filters
=
filters
# Compile a theano function for the reference implementation
conv_ref
=
self
.
cpu_conv_class
(
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
dilation
)(
ref_cast
(
inputs
),
flipped_filters
)
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
"FAST_RUN"
)
# Compare the results of the two implementations
res_ref
=
f_ref
()
res
=
np
.
asarray
(
f
())
if
algo
in
cudnn
.
deterministic_fwd_algorithms
:
utt
.
assert_allclose
(
res
,
np
.
asarray
(
f
()))
atol
,
rtol
=
self
.
get_atol_rtol
(
algo
,
dtype
,
precision
)
if
beta
==
0
:
cpu_res
=
alpha
*
res_ref
else
:
cpu_res
=
alpha
*
res_ref
+
beta
*
out
self
.
scale_numpy_arrays_inplace
(
cpu_res
,
res
,
alpha
)
utt
.
assert_allclose
(
cpu_res
,
res
,
rtol
=
rtol
,
atol
=
atol
)
def
run_conv_gradinput
(
self
,
algo
,
dtype
,
precision
,
parameters
):
inputs_shape
,
filters_shape
,
subsample
,
dilation
,
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
if
beta
==
0
:
inputs_val
=
None
else
:
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
dtype
)
inputs_val
/=
10
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
dtype
)
topgrad_val
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
,
dtype
)
# Scale down the input values to prevent absolute errors in utt.assert_allclose.
filters_val
/=
10
topgrad_val
/=
10
filters
=
theano
.
shared
(
filters_val
)
topgrad
=
theano
.
shared
(
topgrad_val
)
# Compile a theano function for the cuDNN implementation
grad_i
=
dnn_gradinput
(
filters
,
topgrad
,
inputs_shape
,
alpha
=
alpha
,
beta
=
beta
,
out
=
inputs_val
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
algo
=
algo
,
precision
=
precision
)
f
=
theano
.
function
([],
grad_i
,
mode
=
mode_with_gpu
)
# If conv_mode is 'conv' the reference implementation should use
# filters flipped according to the width, height and time axis
if
conv_mode
==
'conv'
:
if
filters
.
ndim
==
5
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
else
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
else
:
flipped_filters
=
filters
# Compile a theano function for the reference implementation
grad_i_ref
=
self
.
cpu_gradinput_class
(
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
dilation
)(
ref_cast
(
flipped_filters
),
ref_cast
(
topgrad
),
inputs_shape
[
2
:])
f_ref
=
theano
.
function
([],
grad_i_ref
,
mode
=
"FAST_RUN"
)
# Compare the results of the two implementations
res_ref
=
f_ref
()
res
=
np
.
asarray
(
f
())
if
algo
in
cudnn
.
deterministic_bwd_data_algorithms
:
utt
.
assert_allclose
(
res
,
np
.
asarray
(
f
()))
atol
,
rtol
=
self
.
get_atol_rtol
(
algo
,
dtype
,
precision
)
if
beta
==
0
:
cpu_res
=
alpha
*
res_ref
else
:
cpu_res
=
alpha
*
res_ref
+
beta
*
inputs_val
self
.
scale_numpy_arrays_inplace
(
cpu_res
,
res
,
alpha
)
utt
.
assert_allclose
(
cpu_res
,
res
,
rtol
=
rtol
,
atol
=
atol
)
def
run_conv_gradweight
(
self
,
algo
,
dtype
,
precision
,
parameters
):
inputs_shape
,
filters_shape
,
subsample
,
dilation
,
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
dtype
)
if
beta
==
0
:
filters_val
=
None
else
:
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
dtype
)
filters_val
/=
10
topgrad_val
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
,
dtype
)
# Scale down the input values to prevent absolute errors in utt.assert_allclose.
inputs_val
/=
10
topgrad_val
/=
10
inputs
=
theano
.
shared
(
inputs_val
)
topgrad
=
theano
.
shared
(
topgrad_val
)
# Compile a theano function for the cuDNN implementation
grad_w
=
dnn_gradweight
(
inputs
,
topgrad
,
filters_shape
,
alpha
=
alpha
,
beta
=
beta
,
out
=
filters_val
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
algo
=
algo
,
precision
=
precision
)
f
=
theano
.
function
([],
grad_w
,
mode
=
mode_with_gpu
)
# Compile a theano function for the reference implementation
grad_w_ref
=
self
.
cpu_gradweight_class
(
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
dilation
)(
ref_cast
(
inputs
),
ref_cast
(
topgrad
),
filters_shape
[
2
:])
if
conv_mode
==
'conv'
:
if
inputs
.
ndim
==
5
:
grad_w_ref
=
grad_w_ref
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
else
:
grad_w_ref
=
grad_w_ref
[:,
:,
::
-
1
,
::
-
1
]
f_ref
=
theano
.
function
([],
grad_w_ref
,
mode
=
"FAST_RUN"
)
# Compare the results of the two implementations
res_ref
=
f_ref
()
res
=
np
.
asarray
(
f
())
if
algo
in
cudnn
.
deterministic_bwd_filter_algorithms
:
utt
.
assert_allclose
(
res
,
np
.
asarray
(
f
()))
atol
,
rtol
=
self
.
get_atol_rtol
(
algo
,
dtype
,
precision
)
if
beta
==
0
:
cpu_res
=
alpha
*
res_ref
else
:
cpu_res
=
alpha
*
res_ref
+
beta
*
filters_val
self
.
scale_numpy_arrays_inplace
(
cpu_res
,
res
,
alpha
)
utt
.
assert_allclose
(
cpu_res
,
res
,
rtol
=
rtol
,
atol
=
atol
)
def
should_fail
(
self
,
function
,
*
args
):
try
:
print
(
'(should fail)'
,
file
=
sys
.
stderr
,
end
=
' '
)
function
(
*
args
)
except
Exception
:
pass
else
:
raise
AssertionError
(
'Should fail'
,
callable
.
__name__
,
*
args
)
def
should_fail_fwd
(
self
,
*
args
):
self
.
should_fail
(
self
.
run_conv_fwd
,
*
args
)
def
should_fail_gradinput
(
self
,
*
args
):
self
.
should_fail
(
self
.
run_conv_gradinput
,
*
args
)
def
should_fail_gradweight
(
self
,
*
args
):
self
.
should_fail
(
self
.
run_conv_gradweight
,
*
args
)
def
get_expected_tcount
(
self
):
"""
Utility function to get expected test count
without actually run nosetests.
"""
return
(
sum
(
1
for
t
in
self
.
test_fwd
())
+
sum
(
1
for
t
in
self
.
test_gradweight
())
+
sum
(
1
for
t
in
self
.
test_gradinput
())
+
sum
(
1
for
t
in
self
.
test_fwd_runtime_algorithms
())
+
sum
(
1
for
t
in
self
.
test_gradweight_runtime_algorithms
())
+
sum
(
1
for
t
in
self
.
test_gradinput_runtime_algorithms
()))
# Iterable test methods.
def
test_fwd
(
self
):
for
dtype
,
precision
in
self
.
dtype_configs
:
algos
=
[
algo
for
algo
in
self
.
fwd_algorithms
if
cudnn
.
fwd_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
self
.
ndim
)]
for
algo
in
algos
:
for
parameters
in
cudnn_conv_case_generator
.
fwd
(
algo
,
self
.
ndim
,
dtype
,
precision
)
.
get_cases
():
yield
(
self
.
run_conv_fwd
,
algo
,
dtype
,
precision
,
parameters
)
if
algos
:
# Some algorithms support current data type configuration for current ndim.
# So, an algorithm could be chosen at runtime.
for
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
for
parameters
in
cudnn_conv_case_generator
.
fwd
(
algo
,
self
.
ndim
,
dtype
,
precision
)
.
get_cases
():
yield
(
self
.
run_conv_fwd
,
algo
,
dtype
,
precision
,
parameters
)
for
dnn_case
in
self
.
special_cases
:
if
dnn_case
.
is_fwd
():
if
dnn_case
.
should_fail
:
yield
(
self
.
should_fail_fwd
,)
+
dnn_case
.
get_case
()
else
:
yield
(
self
.
run_conv_fwd
,)
+
dnn_case
.
get_case
()
def
test_gradinput
(
self
):
for
dtype
,
precision
in
self
.
dtype_configs
:
algos
=
[
algo
for
algo
in
self
.
bwd_data_algorithms
if
cudnn
.
bwd_data_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
self
.
ndim
)]
for
algo
in
algos
:
for
parameters
in
cudnn_conv_case_generator
.
gi
(
algo
,
self
.
ndim
,
dtype
,
precision
)
.
get_cases
():
yield
(
self
.
run_conv_gradinput
,
algo
,
dtype
,
precision
,
parameters
)
if
algos
:
# Some algorithms support current data type configuration for current ndim.
# So, an algorithm could be chosen at runtime.
for
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
for
parameters
in
cudnn_conv_case_generator
.
gi
(
algo
,
self
.
ndim
,
dtype
,
precision
)
.
get_cases
():
yield
(
self
.
run_conv_gradinput
,
algo
,
dtype
,
precision
,
parameters
)
for
dnn_case
in
self
.
special_cases
:
if
dnn_case
.
is_bwd_data
():
if
dnn_case
.
should_fail
:
yield
(
self
.
should_fail_gradinput
,)
+
dnn_case
.
get_case
()
else
:
yield
(
self
.
run_conv_gradinput
,)
+
dnn_case
.
get_case
()
def
test_gradweight
(
self
):
for
dtype
,
precision
in
self
.
dtype_configs
:
algos
=
[
algo
for
algo
in
self
.
bwd_filter_algorithms
if
cudnn
.
bwd_filter_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
self
.
ndim
)]
for
algo
in
algos
:
for
parameters
in
cudnn_conv_case_generator
.
gw
(
algo
,
self
.
ndim
,
dtype
,
precision
)
.
get_cases
():
yield
(
self
.
run_conv_gradweight
,
algo
,
dtype
,
precision
,
parameters
)
if
algos
:
# Some algorithms support current data type configuration for current ndim.
# So, an algorithm could be chosen at runtime.
for
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
for
parameters
in
cudnn_conv_case_generator
.
gw
(
algo
,
self
.
ndim
,
dtype
,
precision
)
.
get_cases
():
yield
(
self
.
run_conv_gradweight
,
algo
,
dtype
,
precision
,
parameters
)
for
dnn_case
in
self
.
special_cases
:
if
dnn_case
.
is_bwd_filter
():
if
dnn_case
.
should_fail
:
yield
(
self
.
should_fail_gradweight
,)
+
dnn_case
.
get_case
()
else
:
yield
(
self
.
run_conv_gradweight
,)
+
dnn_case
.
get_case
()
# The 3 following tests are intended to be run with theano flag `cmodule.debug=True`.
# The output message should then be analyzed to check if runtime algorithms are
# reused, reloaded from cache or updated, depending on what we expect from
# dnn_fwd/dnn_gi/dnn_gw current codes. I currently don't know a better way
# to efficiently test implemented cuDNN convolution caches.
def
test_fwd_runtime_algorithms
(
self
):
dtype
=
'float32'
unit_shape
=
(
1
,)
*
self
.
ndim
_broadcastable
=
[
False
]
*
(
2
+
self
.
ndim
)
def
run_fwd_runtime_algorithm
(
algo
):
inputs
=
theano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
filters
=
theano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
# Scale down the input values to prevent very large absolute errors
# due to float rounding
lower_inputs
=
inputs
/
10
lower_filters
=
filters
/
10
conv
=
dnn_conv
(
img
=
lower_inputs
,
kerns
=
lower_filters
,
algo
=
algo
,
precision
=
dtype
,
subsample
=
unit_shape
,
dilation
=
unit_shape
)
f
=
theano
.
function
([
inputs
,
filters
],
conv
,
mode
=
mode_with_gpu
)
if
self
.
ndim
==
3
:
flipped_filters
=
lower_filters
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
else
:
flipped_filters
=
lower_filters
[:,
:,
::
-
1
,
::
-
1
]
conv_ref
=
self
.
cpu_conv_class
(
subsample
=
unit_shape
)(
ref_cast
(
lower_inputs
),
flipped_filters
)
f_ref
=
theano
.
function
([
inputs
,
filters
],
conv_ref
,
mode
=
'FAST_RUN'
)
runtime_shapes
=
self
.
runtime_shapes
if
algo
in
(
'time_once'
,
'guess_once'
):
runtime_shapes
=
[
list
(
runtime_shapes
[
0
])]
runtime_shapes
[
0
][
0
]
=
5
for
ntimes
,
(
inputs_shape
,
filters_shape
)
in
runtime_shapes
:
print
(
'Shapes:'
,
inputs_shape
,
filters_shape
)
for
i
in
range
(
ntimes
):
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
dtype
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
dtype
)
gpu_res
=
np
.
asarray
(
f
(
inputs_val
,
filters_val
))
cpu_res
=
f_ref
(
inputs_val
,
filters_val
)
self
.
scale_numpy_arrays_inplace
(
cpu_res
,
gpu_res
,
1
)
utt
.
assert_allclose
(
cpu_res
,
gpu_res
)
for
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
yield
(
run_fwd_runtime_algorithm
,
algo
)
def
test_gradinput_runtime_algorithms
(
self
):
dtype
=
'float32'
unit_shape
=
(
1
,)
*
self
.
ndim
_broadcastable
=
[
False
]
*
(
2
+
self
.
ndim
)
def
run_gradinput_runtime_algorithm
(
algo
):
theano
.
config
.
dnn
.
conv
.
algo_bwd_data
=
algo
inputs
=
theano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
filters
=
theano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
conv
=
dnn_conv
(
img
=
inputs
,
kerns
=
filters
,
algo
=
algo
,
precision
=
dtype
,
subsample
=
unit_shape
,
dilation
=
unit_shape
)
grad_i
=
theano
.
tensor
.
grad
(
conv
.
sum
(),
[
inputs
])
f
=
theano
.
function
([
inputs
,
filters
],
grad_i
,
mode
=
mode_with_gpu
)
assert
1
==
len
([
node
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
if
isinstance
(
node
.
op
,
GpuDnnConvGradI
)])
assert
not
any
(
isinstance
(
node
.
op
,
GpuDnnConv
)
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
assert
not
any
(
isinstance
(
node
.
op
,
GpuDnnConvGradW
)
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
if
self
.
ndim
==
3
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
else
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
conv_ref
=
self
.
cpu_conv_class
(
subsample
=
unit_shape
)(
ref_cast
(
inputs
),
flipped_filters
)
grad_i_ref
=
theano
.
tensor
.
grad
(
conv_ref
.
sum
(),
[
inputs
])
f_ref
=
theano
.
function
([
inputs
,
filters
],
grad_i_ref
,
mode
=
'FAST_RUN'
)
runtime_shapes
=
self
.
runtime_shapes
if
algo
in
(
'time_once'
,
'guess_once'
):
runtime_shapes
=
[
list
(
runtime_shapes
[
0
])]
runtime_shapes
[
0
][
0
]
=
5
for
ntimes
,
(
inputs_shape
,
filters_shape
)
in
runtime_shapes
:
print
(
'Shapes:'
,
inputs_shape
,
filters_shape
)
for
i
in
range
(
ntimes
):
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
dtype
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
dtype
)
gpu_res
=
f
(
inputs_val
,
filters_val
)
cpu_res
=
f_ref
(
inputs_val
,
filters_val
)
utt
.
assert_allclose
(
cpu_res
,
np
.
asarray
(
gpu_res
))
for
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
yield
(
run_gradinput_runtime_algorithm
,
algo
)
def
test_gradweight_runtime_algorithms
(
self
):
dtype
=
'float32'
unit_shape
=
(
1
,)
*
self
.
ndim
_broadcastable
=
[
False
]
*
(
2
+
self
.
ndim
)
def
run_gradweight_runtime_algorithm
(
algo
):
theano
.
config
.
dnn
.
conv
.
algo_bwd_filter
=
algo
inputs
=
theano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
filters
=
theano
.
tensor
.
TensorType
(
dtype
,
_broadcastable
)()
conv
=
dnn_conv
(
img
=
inputs
,
kerns
=
filters
,
algo
=
algo
,
precision
=
dtype
,
subsample
=
unit_shape
,
dilation
=
unit_shape
)
grad_w
=
theano
.
tensor
.
grad
(
conv
.
sum
(),
[
filters
])
f
=
theano
.
function
([
inputs
,
filters
],
grad_w
,
mode
=
mode_with_gpu
)
assert
1
==
len
([
node
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
if
isinstance
(
node
.
op
,
GpuDnnConvGradW
)])
assert
not
any
(
isinstance
(
node
.
op
,
GpuDnnConv
)
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
assert
not
any
(
isinstance
(
node
.
op
,
GpuDnnConvGradI
)
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
if
self
.
ndim
==
3
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
else
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
conv_ref
=
self
.
cpu_conv_class
(
subsample
=
unit_shape
)(
ref_cast
(
inputs
),
flipped_filters
)
grad_w_ref
=
theano
.
tensor
.
grad
(
conv_ref
.
sum
(),
[
filters
])
f_ref
=
theano
.
function
([
inputs
,
filters
],
grad_w_ref
,
mode
=
'FAST_RUN'
)
runtime_shapes
=
self
.
runtime_shapes
if
algo
in
(
'time_once'
,
'guess_once'
):
runtime_shapes
=
[
list
(
runtime_shapes
[
0
])]
runtime_shapes
[
0
][
0
]
=
5
for
ntimes
,
(
inputs_shape
,
filters_shape
)
in
runtime_shapes
:
print
(
'Shapes:'
,
inputs_shape
,
filters_shape
)
for
i
in
range
(
ntimes
):
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
dtype
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
dtype
)
gpu_res
=
f
(
inputs_val
,
filters_val
)
cpu_res
=
f_ref
(
inputs_val
,
filters_val
)
utt
.
assert_allclose
(
cpu_res
,
np
.
asarray
(
gpu_res
))
for
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
yield
(
run_gradweight_runtime_algorithm
,
algo
)
class
TestDnnConv2D
(
BaseTestDnnConv
):
ndim
=
2
fwd_algorithms
=
cudnn
.
cudnnConvolutionFwdAlgo_t
.
get_aliases
()
bwd_filter_algorithms
=
cudnn
.
cudnnConvolutionBwdFilterAlgo_t
.
get_aliases
()
bwd_data_algorithms
=
cudnn
.
cudnnConvolutionBwdDataAlgo_t
.
get_aliases
()
cpu_conv_class
=
CorrMM
cpu_gradinput_class
=
CorrMM_gradInputs
cpu_gradweight_class
=
CorrMM_gradWeights
special_cases
=
[
ConvCase
.
bwd_filter
(
algo
=
'deterministic'
,
dtype
=
'float32'
,
precision
=
'float32'
,
inputs_shape
=
(
1
,
1
,
541211
,
10
),
filters_shape
=
(
50
,
1
,
3
,
10
),
border_mode
=
(
1
,
0
),
should_fail
=
(
cudnn
.
version
<=
6
)),
ConvCase
.
fwd
(
algo
=
'small'
,
dtype
=
'float32'
,
precision
=
'float32'
,
inputs_shape
=
(
65536
,
2
,
2
,
2
),
filters_shape
=
(
1
,
2
,
2
,
2
)),
# NB: Due to current workaround (see dnn_fwd.c), this test won't fail for cuDNN < v6100.
ConvCase
.
fwd
(
algo
=
'small'
,
dtype
=
'float32'
,
precision
=
'float32'
,
inputs_shape
=
(
65537
,
2
,
2
,
2
),
filters_shape
=
(
1
,
2
,
2
,
2
))]
runtime_shapes
=
[
(
3
,
[(
2
,
3
,
10
,
9
),
(
5
,
3
,
7
,
7
)]),
(
1
,
[(
1
,
1
,
100
,
200
),
(
1
,
1
,
50
,
200
)]),
(
1
,
[(
4
,
2
,
20
,
20
),
(
2
,
2
,
20
,
19
)]),
(
3
,
[(
2
,
3
,
10
,
9
),
(
5
,
3
,
7
,
7
)]),
# cache should be used
(
1
,
[(
2
,
2
,
50
,
50
),
(
5
,
2
,
25
,
31
)]),
(
1
,
[(
1
,
1
,
100
,
200
),
(
1
,
1
,
50
,
200
)]),
# cache should be used
(
1
,
[(
4
,
2
,
20
,
20
),
(
2
,
2
,
20
,
19
)]),
# cache should be used
(
1
,
[(
1
,
2
,
3
,
4
),
(
6
,
2
,
2
,
1
)])
]
class
TestDnnConv3D
(
BaseTestDnnConv
):
ndim
=
3
fwd_algorithms
=
cudnn
.
conv3d_fwd_algorithms
bwd_filter_algorithms
=
cudnn
.
conv3d_bwd_filter_algorithms
bwd_data_algorithms
=
cudnn
.
conv3d_bwd_data_algorithms
cpu_conv_class
=
Corr3dMM
cpu_gradinput_class
=
Corr3dMM_gradInputs
cpu_gradweight_class
=
Corr3dMM_gradWeights
special_cases
=
[
ConvCase
.
fwd
(
algo
=
'small'
,
dtype
=
'float32'
,
precision
=
'float32'
,
inputs_shape
=
(
65536
,
2
,
2
,
2
,
2
),
filters_shape
=
(
1
,
2
,
2
,
2
,
2
)),
# NB: Due to current workaround (see dnn_fwd.c), this test won't fail for cuDNN < v6100.
ConvCase
.
fwd
(
algo
=
'small'
,
dtype
=
'float32'
,
precision
=
'float32'
,
inputs_shape
=
(
65537
,
2
,
2
,
2
,
2
),
filters_shape
=
(
1
,
2
,
2
,
2
,
2
))]
runtime_shapes
=
[
(
3
,
[(
2
,
3
,
5
,
10
,
9
),
(
5
,
3
,
4
,
7
,
7
)]),
(
1
,
[(
1
,
1
,
5
,
100
,
200
),
(
1
,
1
,
4
,
50
,
200
)]),
(
1
,
[(
4
,
2
,
20
,
20
,
20
),
(
2
,
2
,
20
,
19
,
18
)]),
(
3
,
[(
2
,
3
,
5
,
10
,
9
),
(
5
,
3
,
4
,
7
,
7
)]),
# cache should be used
(
1
,
[(
2
,
2
,
50
,
50
,
5
),
(
5
,
2
,
25
,
31
,
4
)]),
(
1
,
[(
1
,
1
,
5
,
100
,
200
),
(
1
,
1
,
4
,
50
,
200
)]),
# cache should be used
(
1
,
[(
4
,
2
,
20
,
20
,
20
),
(
2
,
2
,
20
,
19
,
18
)]),
# cache should be used
(
1
,
[(
1
,
2
,
3
,
4
,
5
),
(
6
,
2
,
3
,
2
,
1
)])
]
def
test_true_half_config_support
():
# For cuDNN V5.1 and V6.0:
# "TRUE_HALF_CONFIG is only supported on architectures with true fp16 support (compute capability 5.3 and 6.0)"
if
not
check_dtype_config_support
(
'float16'
,
'float16'
):
raise
SkipTest
(
'FWD: TRUE_HALF_CONFIG not supported on this GPU.'
)
class
CheckDnn
:
"""
Utility functions for scripting and infos printing.
"""
@staticmethod
def
dtype_config_to_str
(
dtype_config
):
dtype
,
precision
=
dtype_config
if
dtype
==
precision
==
'float16'
:
return
'TRUE_HALF_CONFIG'
if
dtype
==
'float16'
and
precision
==
'float32'
:
return
'PSEUDO_HALF_CONFIG'
if
dtype
==
precision
==
'float32'
:
return
'FLOAT_CONFIG'
if
dtype
==
precision
==
'float64'
:
return
'DOUBLE_CONFIG'
raise
ValueError
(
'unknown data type configuration'
,
dtype_config
)
@staticmethod
def
print_infos
(
count_tests
=
True
):
# Print infos about tests and cuDNN supported algorithms and configurations.
test_2d
=
TestDnnConv2D
()
test_3d
=
TestDnnConv3D
()
print
()
print
(
'Available data type configurations:'
,
', '
.
join
(
CheckDnn
.
dtype_config_to_str
(
d
)
for
d
in
cudnn
.
get_supported_dtype_configs
(
check_dtype_config_support
)))
print
()
print
(
'2D algorithms:'
)
print
(
'FWD :'
,
', '
.
join
(
test_2d
.
fwd_algorithms
))
print
(
'BWD FILTER :'
,
', '
.
join
(
test_2d
.
bwd_filter_algorithms
))
print
(
'BWD DATA :'
,
', '
.
join
(
test_2d
.
bwd_data_algorithms
))
print
()
print
(
'3D algorithms:'
)
print
(
'FWD :'
,
', '
.
join
(
test_3d
.
fwd_algorithms
))
print
(
'BWD FILTER :'
,
', '
.
join
(
test_3d
.
bwd_filter_algorithms
))
print
(
'BWD DATA :'
,
', '
.
join
(
test_3d
.
bwd_data_algorithms
))
print
()
if
count_tests
:
count_tests_2d
=
test_2d
.
get_expected_tcount
()
count_tests_3d
=
test_3d
.
get_expected_tcount
()
print
(
count_tests_2d
,
'conv2D test cases.'
)
print
(
count_tests_3d
,
'conv3D test cases.'
)
print
(
'1 supplementary test.'
)
print
(
count_tests_2d
+
count_tests_3d
+
1
,
'total conv tests.'
)
print
()
@staticmethod
def
print_tests
():
# Print test cases without running them.
for
test
in
(
TestDnnConv2D
(),
TestDnnConv3D
()):
for
tcase
in
test
.
test_fwd
():
print
(
tcase
[
0
]
.
__name__
,
*
tcase
[
1
:])
for
tcase
in
test
.
test_gradinput
():
print
(
tcase
[
0
]
.
__name__
,
*
tcase
[
1
:])
for
tcase
in
test
.
test_gradweight
():
print
(
tcase
[
0
]
.
__name__
,
*
tcase
[
1
:])
for
tcase
in
test
.
test_fwd_runtime_algorithms
():
print
(
tcase
[
0
]
.
__name__
,
*
tcase
[
1
:])
for
tcase
in
test
.
test_gradinput_runtime_algorithms
():
print
(
tcase
[
0
]
.
__name__
,
*
tcase
[
1
:])
for
tcase
in
test
.
test_gradweight_runtime_algorithms
():
print
(
tcase
[
0
]
.
__name__
,
*
tcase
[
1
:])
print
(
test_true_half_config_support
.
__name__
)
if
__name__
==
'__main__'
:
args
=
sys
.
argv
[
1
:]
if
len
(
args
)
==
1
and
args
[
0
]
in
(
'infos'
,
'list'
):
if
args
[
0
]
==
'infos'
:
CheckDnn
.
print_infos
()
if
args
[
0
]
==
'list'
:
CheckDnn
.
print_tests
()
else
:
# We run all tests with nosetests.
module_name
=
sys
.
modules
[
__name__
]
.
__file__
if
len
(
args
)
==
0
:
# No args given: run nosetests -vs
args
=
[
'--verbose'
,
'--nocapture'
]
# Else, use given args.
argv
=
[
sys
.
argv
[
0
],
module_name
]
+
args
CheckDnn
.
print_infos
()
nose
.
main
(
argv
=
argv
)
theano/gpuarray/tests/run_dnn_conv.py
0 → 100644
浏览文件 @
5561f5d6
# This script allows to run one specific cuDNN convolution test case.
# This script should not be imported, but only used as a program.
# python run_dnn_conv.py --help # Print help.
# python run_dnn_conv.py {fwd|bwd-filter|bwd-data} {2d|3d} -a <algo> -i <inputShape> -f <filterShape> ...
from
__future__
import
absolute_import
,
print_function
,
division
import
argparse
import
sys
import
theano
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_RUNTIME
from
theano.gpuarray.cudnn_defs
import
(
HALF
,
FLOAT
,
DOUBLE
,
TRUE_HALF_CONFIG
,
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
,
DOUBLE_CONFIG
)
from
theano.gpuarray.tests.check_dnn_conv
import
(
cudnn
,
TestDnnConv2D
,
TestDnnConv3D
,
CheckDnn
)
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
if
__name__
!=
'__main__'
:
raise
ImportError
(
'This script must not be imported.'
)
class
TupleAction
(
argparse
.
Action
):
# Tuple extractor for command line args parser.
def
__call__
(
self
,
parser
,
namespace
,
values
,
option_string
=
None
):
values
=
tuple
(
int
(
v
)
for
v
in
values
.
split
(
','
))
setattr
(
namespace
,
self
.
dest
,
values
)
class
BorderAction
(
TupleAction
):
# Border extractor for command line args parser.
def
__call__
(
self
,
parser
,
namespace
,
values
,
option_string
=
None
):
if
values
not
in
(
'valid'
,
'full'
,
'half'
):
super
(
BorderAction
,
self
)
.
__call__
(
parser
,
namespace
,
values
,
option_string
)
else
:
setattr
(
namespace
,
self
.
dest
,
values
)
args
=
sys
.
argv
[
1
:]
computations
=
FWD
,
BWD_FILTER
,
BWD_DATA
=
(
'fwd'
,
'gradweight'
,
'gradinput'
)
algorithms
=
(
tuple
(
sorted
(
list
(
set
(
cudnn
.
cudnnConvolutionFwdAlgo_t
.
get_aliases
()
+
cudnn
.
cudnnConvolutionBwdFilterAlgo_t
.
get_aliases
()
+
cudnn
.
cudnnConvolutionBwdDataAlgo_t
.
get_aliases
()))))
+
SUPPORTED_DNN_CONV_ALGO_RUNTIME
)
types
=
(
HALF
,
FLOAT
,
DOUBLE
)
data_type_configurations
=
dict
(
TRUE_HALF_CONFIG
=
TRUE_HALF_CONFIG
,
PSEUDO_HALF_CONFIG
=
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
=
FLOAT_CONFIG
,
DOUBLE_CONFIG
=
DOUBLE_CONFIG
)
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'computation'
,
choices
=
computations
,
help
=
'Computation to run.'
)
parser
.
add_argument
(
'-a'
,
'--algo'
,
choices
=
algorithms
,
required
=
True
,
help
=
'Algorithm to use for computation.'
)
parser
.
add_argument
(
'-i'
,
'--input-shape'
,
action
=
TupleAction
,
required
=
True
,
help
=
'Input shape. Comma-separated list of integers (no spaces).'
)
parser
.
add_argument
(
'-f'
,
'--filter-shape'
,
action
=
TupleAction
,
required
=
True
,
help
=
'Filter shape. Comma-separated list of integers (no spaces).'
)
parser
.
add_argument
(
'-D'
,
'--dtype-config'
,
choices
=
list
(
sorted
(
data_type_configurations
.
keys
())),
default
=
None
,
help
=
'Data type configuration for (data type; precision). Default (theano floatX; theano floatX). '
'To specify data type configuration, you can either use this option or set data type and '
'precision separately with "-t" and "-p" options.'
)
parser
.
add_argument
(
'-t'
,
'--dtype'
,
choices
=
types
,
default
=
None
,
help
=
'Data type (default theano floatX).'
)
parser
.
add_argument
(
'-p'
,
'--precision'
,
choices
=
types
,
default
=
None
,
help
=
'Precision (default theano floatX).'
)
parser
.
add_argument
(
'-s'
,
'--subsample'
,
action
=
TupleAction
,
help
=
'Subsample. Comma-separated list of integers (no spaces). '
'Default: 1 per dimension.'
)
parser
.
add_argument
(
'-d'
,
'--dilation'
,
action
=
TupleAction
,
help
=
'Dilation. Comma-separated list of integers (no spaces). '
'Default: 1 per dimension.'
)
parser
.
add_argument
(
'-b'
,
'--border-mode'
,
default
=
'valid'
,
action
=
BorderAction
,
help
=
'Border mode. "valid" (default), "full", "half" '
'or a comma-separated list of integers (no spaces).'
)
parser
.
add_argument
(
'-c'
,
'--conv-mode'
,
choices
=
(
'conv'
,
'cross'
),
default
=
'conv'
,
help
=
'Conv mode (default: conv).'
)
parser
.
add_argument
(
'-A'
,
'--alpha'
,
type
=
float
,
default
=
1
,
help
=
"alpha (floating), must not be zero. Default 1."
)
parser
.
add_argument
(
'-B'
,
'--beta'
,
type
=
float
,
default
=
0
,
help
=
'beta (floating). Default 0.'
)
parser
.
add_argument
(
'-I'
,
'--print-infos'
,
action
=
'store_true'
,
default
=
False
,
help
=
'Print some infos before testing.'
)
args
=
parser
.
parse_args
(
args
)
test
=
args
.
computation
if
len
(
args
.
input_shape
)
!=
len
(
args
.
filter_shape
):
raise
ValueError
(
'Expected same length for input shape and filter shape'
)
if
len
(
args
.
input_shape
)
not
in
(
4
,
5
):
raise
ValueError
(
'Expected length 4 or 5 for input shape'
)
ndim
=
len
(
args
.
input_shape
)
-
2
if
ndim
==
2
:
tests
=
TestDnnConv2D
()
elif
ndim
==
3
:
tests
=
TestDnnConv3D
()
if
args
.
subsample
is
None
:
args
.
subsample
=
(
1
,)
*
ndim
if
args
.
dilation
is
None
:
args
.
dilation
=
(
1
,)
*
ndim
if
not
(
ndim
==
len
(
args
.
subsample
)
==
len
(
args
.
dilation
)):
raise
ValueError
(
'Expected parameters sized for
%
d dimensions.'
%
ndim
)
if
isinstance
(
args
.
border_mode
,
tuple
)
and
ndim
!=
len
(
args
.
border_mode
):
raise
ValueError
(
'Expected borders sized for
%
d dimensions.'
%
ndim
)
if
args
.
alpha
==
0
:
raise
ValueError
(
'Nothing could be computed if alpha is 0.'
)
if
args
.
dtype_config
is
None
:
if
args
.
dtype
is
None
:
args
.
dtype
=
theano
.
config
.
floatX
if
args
.
precision
is
None
:
args
.
precision
=
theano
.
config
.
floatX
else
:
if
args
.
dtype
is
not
None
or
args
.
precision
is
not
None
:
raise
ValueError
(
'You must specify either -D <data-type-configuration> '
'or (-t <data-type> -p <precision>), not both.'
)
args
.
dtype
,
args
.
precision
=
data_type_configurations
[
args
.
dtype_config
]
if
(
args
.
dtype
,
args
.
precision
)
not
in
cudnn
.
get_supported_dtype_configs
():
raise
ValueError
(
'Unsupported data type configuration
%
s
%
s.'
%
(
args
.
dtype
,
args
.
precision
))
if
args
.
algo
not
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
:
check_config
=
False
if
test
==
FWD
:
check_config
=
cudnn
.
fwd_algo_supports_dtype_config
(
args
.
algo
,
args
.
dtype
,
args
.
precision
,
ndim
)
elif
test
==
BWD_FILTER
:
check_config
=
cudnn
.
bwd_filter_algo_supports_dtype_config
(
args
.
algo
,
args
.
dtype
,
args
.
precision
,
ndim
)
elif
test
==
BWD_DATA
:
check_config
=
cudnn
.
bwd_data_algo_supports_dtype_config
(
args
.
algo
,
args
.
dtype
,
args
.
precision
,
ndim
)
if
not
check_config
:
print
(
'Warning:
%
s computation does not normally support configuration (
%
s,
%
s) for algo
%
s.'
%
(
test
,
args
.
dtype
,
args
.
precision
,
args
.
algo
),
file
=
sys
.
stderr
)
algo
=
args
.
algo
dtype
=
args
.
dtype
precision
=
args
.
precision
parameters
=
(
args
.
input_shape
,
args
.
filter_shape
,
args
.
subsample
,
args
.
dilation
,
args
.
border_mode
,
args
.
conv_mode
,
args
.
alpha
,
args
.
beta
)
if
args
.
print_infos
:
CheckDnn
.
print_infos
(
count_tests
=
False
)
print
(
'======================'
)
print
(
'Running'
,
test
,
algo
,
dtype
,
precision
,
*
parameters
)
if
test
==
FWD
:
tests
.
run_conv_fwd
(
algo
,
dtype
,
precision
,
parameters
)
expected_output_shape
=
get_conv_output_shape
(
args
.
input_shape
,
args
.
filter_shape
,
args
.
border_mode
,
args
.
subsample
,
args
.
dilation
)
elif
test
==
BWD_FILTER
:
tests
.
run_conv_gradweight
(
algo
,
dtype
,
precision
,
parameters
)
expected_output_shape
=
args
.
filter_shape
elif
test
==
BWD_DATA
:
tests
.
run_conv_gradinput
(
algo
,
dtype
,
precision
,
parameters
)
expected_output_shape
=
args
.
input_shape
print
(
'Computed shape:'
,
expected_output_shape
)
print
(
'... OK'
)
theano/gpuarray/tests/test_dnn.py
浏览文件 @
5561f5d6
...
@@ -1215,7 +1215,7 @@ def test_conv3d_fwd():
...
@@ -1215,7 +1215,7 @@ def test_conv3d_fwd():
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
# If conv_mode is 'conv' the reference implementation should use
# If conv_mode is 'conv' the reference implementation should use
# filters f
il
pped according to the width, height and time axis
# filters f
li
pped according to the width, height and time axis
if
conv_mode
==
'conv'
:
if
conv_mode
==
'conv'
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
else
:
else
:
...
@@ -1271,7 +1271,7 @@ def test_conv3d_bwd():
...
@@ -1271,7 +1271,7 @@ def test_conv3d_bwd():
f
=
theano
.
function
([],
[
grad_i
,
grad_w
],
mode
=
mode_with_gpu
)
f
=
theano
.
function
([],
[
grad_i
,
grad_w
],
mode
=
mode_with_gpu
)
# If conv_mode is 'conv' the reference implementation should use
# If conv_mode is 'conv' the reference implementation should use
# filters f
il
pped according to the width, height and time axis
# filters f
li
pped according to the width, height and time axis
if
conv_mode
==
'conv'
:
if
conv_mode
==
'conv'
:
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
flipped_filters
=
filters
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
else
:
else
:
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论