Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1b6e6389
提交
1b6e6389
authored
6月 07, 2017
作者:
notoraptor
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cancel some changes into CEnumType.
Cancel changes into dnn_fwd.c Heavy simplification of check_dnn. Make check_dnn runnable as a python script.
上级
6cc0c5ca
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
271 行增加
和
1188 行删除
+271
-1188
type.py
theano/gof/type.py
+1
-25
cudnn_defs.py
theano/gpuarray/cudnn_defs.py
+118
-13
check_dnn.py
theano/gpuarray/tests/check_dnn.py
+152
-262
check_dnn_doc.py
theano/gpuarray/tests/check_dnn_doc.py
+0
-284
dnn_choose_fwd.c
theano/gpuarray/tests/dnn_choose_fwd.c
+0
-212
dnn_choose_gi.c
theano/gpuarray/tests/dnn_choose_gi.c
+0
-202
dnn_choose_gw.c
theano/gpuarray/tests/dnn_choose_gw.c
+0
-190
没有找到文件。
theano/gof/type.py
浏览文件 @
1b6e6389
...
@@ -909,11 +909,7 @@ class EnumType(Type, dict):
...
@@ -909,11 +909,7 @@ class EnumType(Type, dict):
.. note::
.. note::
:class:`EnumType` is not complete and should never be used for regular graph operations.
This Type (and subclasses) is not complete and should never be used for regular graph operations.
:class:`EnumList` is not complete and should never be used for regular graph operations.
**:class:`CEnumType` is complete.**
"""
"""
...
@@ -1053,9 +1049,6 @@ class EnumType(Type, dict):
...
@@ -1053,9 +1049,6 @@ class EnumType(Type, dict):
#ifndef PyInt_AsLong
#ifndef PyInt_AsLong
#define PyInt_AsLong PyLong_AsLong
#define PyInt_AsLong PyLong_AsLong
#endif
#endif
#ifndef PyInt_FromLong
#define PyInt_FromLong PyLong_FromLong
#endif
#endif
#endif
"""
"""
...
@@ -1248,22 +1241,5 @@ class CEnumType(EnumList):
...
@@ -1248,22 +1241,5 @@ class CEnumType(EnumList):
"""
%
dict
(
i
=
i
,
name
=
name
,
constant_cname
=
swapped_dict
[
i
])
for
i
in
sorted
(
swapped_dict
.
keys
())),
"""
%
dict
(
i
=
i
,
name
=
name
,
constant_cname
=
swapped_dict
[
i
])
for
i
in
sorted
(
swapped_dict
.
keys
())),
fail
=
sub
[
'fail'
])
fail
=
sub
[
'fail'
])
def
c_sync
(
self
,
name
,
sub
):
return
"""
int py_value = -1;
Py_XDECREF(py_
%(name)
s);
/* We assume that ctype is an integer type usable in a switch. */
switch (
%(name)
s) {
%(cases)
s
default:
PyErr_SetString(PyExc_ValueError, "CEnumType: cannot map C value to Python constant.");
{
%(fail)
s}
break;
}
py_
%(name)
s = PyInt_FromLong(py_value);
"""
%
dict
(
name
=
name
,
fail
=
sub
[
'fail'
],
cases
=
''
.
join
(
"""
case
%(constant_cname)
s: py_value =
%(constant_pyvalue)
d; break;
"""
%
dict
(
constant_cname
=
k
,
constant_pyvalue
=
v
)
for
k
,
v
in
sorted
(
self
.
items
(),
key
=
lambda
t
:
t
[
1
])))
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
1
,
super
(
CEnumType
,
self
)
.
c_code_cache_version
())
return
(
1
,
super
(
CEnumType
,
self
)
.
c_code_cache_version
())
theano/gpuarray/cudnn_defs.py
浏览文件 @
1b6e6389
...
@@ -19,8 +19,27 @@ from __future__ import absolute_import, print_function, division
...
@@ -19,8 +19,27 @@ from __future__ import absolute_import, print_function, division
from
theano.gof
import
CEnumType
from
theano.gof
import
CEnumType
HALF
,
FLOAT
,
DOUBLE
=
(
'float16'
,
'float32'
,
'float64'
)
HALF
,
FLOAT
,
DOUBLE
=
(
'float16'
,
'float32'
,
'float64'
)
TRUE_HALF_CONFIG
=
(
HALF
,
HALF
)
PSEUDO_HALF_CONFIG
=
(
HALF
,
FLOAT
)
FLOAT_CONFIG
=
(
FLOAT
,
FLOAT
)
DOUBLE_CONFIG
=
(
DOUBLE
,
DOUBLE
)
def
is_true_half_config
(
dtype
,
precision
):
return
dtype
==
precision
==
HALF
def
is_pseudo_half_config
(
dtype
,
precision
):
return
dtype
==
HALF
and
precision
==
FLOAT
def
is_float_config
(
dtype
,
precision
):
return
dtype
==
precision
==
FLOAT
def
is_double_config
(
dtype
,
precision
):
return
dtype
==
precision
==
DOUBLE
# NB: Some cuDNN algorithms are listed in cuDNN enums but not implemented.
# NB: Some cuDNN algorithms are listed in cuDNN enums but not implemented.
...
@@ -103,22 +122,97 @@ class CuDNNV51(object):
...
@@ -103,22 +122,97 @@ class CuDNNV51(object):
# empty list of enum to don't crash with cudnn 5
# empty list of enum to don't crash with cudnn 5
cudnnReduceTensorOp_t
=
CEnumType
()
cudnnReduceTensorOp_t
=
CEnumType
()
def
supported_precisions
(
self
,
dtype
):
def
get_supported_dtype_configs
(
self
):
"""
"""
Return the tuple of
precisions supported by cuDNN for given input data type
.
Return the tuple of
data type configurations supported by this version of cuDNN
.
This is currently convenient for both cuDNN V5.1 and V6, as Theano does not
This is currently convenient for both cuDNN V5.1 and V6, as Theano does not
yet support new data types (like INT8, INT8x4, etc.).
yet support new data types (like INT8, INT8x4, etc.).
"""
"""
assert
dtype
in
(
HALF
,
FLOAT
,
DOUBLE
)
return
(
TRUE_HALF_CONFIG
,
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
,
DOUBLE_CONFIG
)
if
dtype
==
HALF
:
# TRUE_HALF_CONFIG, PSEUDO_HALF_CONFIG
def
get_fwd_dtype_configs
(
self
,
check_runtime
=
None
):
return
(
HALF
,
FLOAT
)
# NB: "TRUE_HALF_CONFIG is only supported on architectures with true fp16 support
if
dtype
==
FLOAT
:
# (compute capability 5.3 and 6.0)". Can be checked at runtime only.
# FLOAT_CONFIG
if
check_runtime
is
None
or
check_runtime
(
*
TRUE_HALF_CONFIG
):
return
(
FLOAT
,)
return
self
.
get_supported_dtype_configs
()
if
dtype
==
DOUBLE
:
return
(
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
,
DOUBLE_CONFIG
)
# DOUBLE_CONFIG
return
(
DOUBLE
,)
def
get_bwd_filter_dtype_configs
(
self
,
check_runtime
=
None
):
return
self
.
get_supported_dtype_configs
()
def
get_bwd_data_dtype_configs
(
self
,
check_runtime
=
None
):
return
self
.
get_supported_dtype_configs
()
def
fwd_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
algorithms
=
self
.
cudnnConvolutionFwdAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
:
return
ndim
==
2
or
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_GEMM
:
return
ndim
==
2
and
not
is_true_half_config
(
dtype
,
precision
)
# CUDNN_CONVOLUTION_FWD_ALGO_DIRECT: not implemented.
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_FFT
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
:
if
ndim
==
2
:
return
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
# NB: For cuDNN V6:
# " Data Type Config Support: PSEUDO_HALF_CONFIG, FLOAT_CONFIG
# (DOUBLE_CONFIG is also supported when the task can be handled by 1D FFT,
# ie, one of the filter dimension, width or height is 1)"
# Could be checked only when being in C code.
if
ndim
==
3
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED
:
# NB: "If wDesc 's filter (height, width) is (5,5), data type config TRUE_HALF_CONFIG is not supported".
# We could not check it before being in C code.
return
ndim
==
2
and
not
is_double_config
(
dtype
,
precision
)
return
False
def
bwd_filter_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
algorithms
=
self
.
cudnnConvolutionBwdFilterAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
:
return
ndim
==
2
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED
:
# NB: "If wDesc 's filter (height, width) is (5,5), data type config TRUE_HALF_CONFIG is not supported".
# We could not check it before being in C code.
return
ndim
==
2
and
not
is_double_config
(
dtype
,
precision
)
return
False
def
bwd_data_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
algorithms
=
self
.
cudnnConvolutionBwdDataAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
:
return
not
is_true_half_config
(
dtype
,
precision
)
# CUDNN_CONVOLUTION_BWD_DATA_ALGO_1: all data type configs supported.
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
:
if
ndim
==
2
:
return
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
# NB: For cuDNN V6: "(DOUBLE_CONFIG is also supported when the task can be handled by 1D FFT,
# ie, one of the filter dimension, width or height is 1)"
# Could be checked only when being in C code.
if
ndim
==
3
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD
:
return
ndim
==
2
and
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED
:
# NB: "If wDesc 's filter (height, width) is (5,5), data type config TRUE_HALF_CONFIG is not supported".
# We could not check it before being in C code.
return
ndim
==
2
and
not
is_double_config
(
dtype
,
precision
)
return
False
class
CuDNNV6
(
CuDNNV51
):
class
CuDNNV6
(
CuDNNV51
):
...
@@ -162,6 +256,17 @@ class CuDNNV6(CuDNNV51):
...
@@ -162,6 +256,17 @@ class CuDNNV6(CuDNNV51):
(
'CUDNN_REDUCE_TENSOR_NORM2'
,
'norm2'
),
(
'CUDNN_REDUCE_TENSOR_NORM2'
,
'norm2'
),
ctype
=
'cudnnReduceTensorOp_t'
)
ctype
=
'cudnnReduceTensorOp_t'
)
def
bwd_filter_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
bwd_filter_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
)
if
not
is_supported
:
algorithms
=
self
.
cudnnConvolutionBwdFilterAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
or
is_double_config
(
dtype
,
precision
))
return
is_supported
class
CuDNNV7
(
CuDNNV6
):
class
CuDNNV7
(
CuDNNV6
):
version
=
7
version
=
7
...
...
theano/gpuarray/tests/check_dnn.py
浏览文件 @
1b6e6389
#!/usr/bin/env python
# You can pass nosetests args when running this script. Examples:
# python theano/gpuarray/tests/check_dnn.py # Normal mode.
# python theano/gpuarray/tests/check_dnn.py -xvs # Verbose mode, capture output, exit at first error.
from
__future__
import
absolute_import
,
print_function
,
division
from
__future__
import
absolute_import
,
print_function
,
division
from
itertools
import
ifilter
,
product
from
itertools
import
ifilter
,
product
,
chain
import
nose
import
numpy
as
np
import
numpy
as
np
from
nose.plugins.skip
import
SkipTest
import
theano
import
theano
import
theano.tests.unittest_tools
as
utt
import
theano.tests.unittest_tools
as
utt
from
theano.compile.ops
import
shape_i_op
from
theano.compile.ops
import
shape_i_op
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_RUNTIME
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_RUNTIME
from
theano.gof
import
COp
,
Apply
,
ParamsType
from
theano.gof.type
import
CDataType
from
theano.gpuarray
import
cudnn_defs
from
theano.gpuarray
import
cudnn_defs
from
theano.gpuarray.basic_ops
import
infer_context_name
,
as_gpuarray_variable
,
gpu_contiguous
,
GpuAllocEmpty
from
theano.gpuarray.basic_ops
import
infer_context_name
,
as_gpuarray_variable
,
gpu_contiguous
,
GpuAllocEmpty
from
theano.gpuarray.dnn
import
(
GpuDnnConvDesc
,
GpuDnnConv
,
GpuDnnConvGradW
,
GpuDnnConvGradI
,
version
,
get_precision
,
from
theano.gpuarray.dnn
import
GpuDnnConvDesc
,
GpuDnnConv
,
GpuDnnConvGradW
,
GpuDnnConvGradI
,
version
,
get_precision
DnnBase
,
handle_type
,
DNN_CONV_ALGO_CHOOSE_ONCE
,
DNN_CONV_ALGO_CHOOSE_TIME
)
from
theano.gpuarray.tests.check_dnn_doc
import
check_fwd_algorithm
from
theano.gpuarray.tests.config
import
mode_with_gpu
,
ref_cast
from
theano.gpuarray.tests.config
import
mode_with_gpu
,
ref_cast
from
theano.scalar
import
bool
as
bool_t
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
,
assert_conv_shape
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
,
assert_conv_shape
from
theano.tensor.opt
import
Assert
from
theano.tensor.opt
import
Assert
cudnn
=
cudnn_defs
.
get_definitions
(
version
(
raises
=
False
))
cudnn
=
cudnn_defs
.
get_definitions
(
version
(
raises
=
False
))
cudnnConvolutionFwdAlgo_t
=
cudnn
.
cudnnConvolutionFwdAlgo_t
cudnnConvolutionBwdFilterAlgo_t
=
cudnn
.
cudnnConvolutionBwdFilterAlgo_t
cudnnConvolutionBwdDataAlgo_t
=
cudnn
.
cudnnConvolutionBwdDataAlgo_t
AVAILABLE_PRECISIONS
=
cudnn
.
supported_precisions
(
theano
.
config
.
floatX
)
class
DnnCaseGenerator
:
class
DnnCaseGenerator
:
"""
"""
...
@@ -81,11 +76,11 @@ class DnnCaseGenerator:
...
@@ -81,11 +76,11 @@ class DnnCaseGenerator:
@staticmethod
@staticmethod
def
get_if_valid_conv_output_shape
(
case_tuple
):
def
get_if_valid_conv_output_shape
(
case_tuple
):
out_shp
=
get_conv_output_shape
(
case_tuple
[
0
]
[
0
]
,
# input shape
out_shp
=
get_conv_output_shape
(
case_tuple
[
0
],
# input shape
case_tuple
[
0
][
1
],
# filter shape
case_tuple
[
1
],
# filter shape
case_tuple
[
1
],
# border mode
case_tuple
[
4
],
# border mode
case_tuple
[
0
][
2
],
# subsample
case_tuple
[
2
],
# subsample
case_tuple
[
0
][
3
]
# dilation
case_tuple
[
3
]
# dilation
)
)
try
:
try
:
return
assert_conv_shape
(
out_shp
)
return
assert_conv_shape
(
out_shp
)
...
@@ -94,7 +89,7 @@ class DnnCaseGenerator:
...
@@ -94,7 +89,7 @@ class DnnCaseGenerator:
def
get_cases
(
self
):
def
get_cases
(
self
):
# Generate an iterator of tuples with format:
# Generate an iterator of tuples with format:
# (
(input shape, filter shape, subsample, dilation), border mode, convolution mode, alpha, beta
)
# (
input shape, filter shape, subsample, dilation, border mode, convolution mode, alpha, beta
)
all_batch_sizes
=
(
self
.
batch_size
,)
all_batch_sizes
=
(
self
.
batch_size
,)
all_input_channels
=
(
self
.
input_channels
,)
all_input_channels
=
(
self
.
input_channels
,)
all_input_sizes
=
self
.
_shapes
(
self
.
input_size
)
all_input_sizes
=
self
.
_shapes
(
self
.
input_size
)
...
@@ -114,7 +109,7 @@ class DnnCaseGenerator:
...
@@ -114,7 +109,7 @@ class DnnCaseGenerator:
all_filter_shapes
=
((
oc
,
ic
)
+
fis
all_filter_shapes
=
((
oc
,
ic
)
+
fis
for
oc
in
all_output_channels
for
ic
in
all_input_channels
for
fis
in
all_filter_sizes
)
for
oc
in
all_output_channels
for
ic
in
all_input_channels
for
fis
in
all_filter_sizes
)
return
ifilter
(
DnnCaseGenerator
.
get_if_valid_conv_output_shape
,
return
ifilter
(
DnnCaseGenerator
.
get_if_valid_conv_output_shape
,
product
(
product
(
all_input_shapes
,
all_filter_shapes
,
all_subsamples
,
all_dilations
)
,
product
(
all_input_shapes
,
all_filter_shapes
,
all_subsamples
,
all_dilations
,
all_border_modes
,
all_conv_modes
,
all_alphas
,
all_betas
))
all_border_modes
,
all_conv_modes
,
all_alphas
,
all_betas
))
...
@@ -142,14 +137,14 @@ def dnn_conv(img, kerns, alpha=1, beta=0, out=None, border_mode='valid', subsamp
...
@@ -142,14 +137,14 @@ def dnn_conv(img, kerns, alpha=1, beta=0, out=None, border_mode='valid', subsamp
desc_op
.
subsample
,
desc_op
.
subsample
,
filter_dilation
=
dilation
)
filter_dilation
=
dilation
)
out_shp
=
assert_conv_shape
(
out_shp
)
out_shp
=
assert_conv_shape
(
out_shp
)
if
beta
!=
0
:
if
beta
==
0
:
real_out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
else
:
assert
out
is
not
None
assert
out
is
not
None
out
=
as_gpuarray_variable
(
out
,
ctx_name
)
out
=
as_gpuarray_variable
(
out
,
ctx_name
)
out
=
gpu_contiguous
(
out
)
out
=
gpu_contiguous
(
out
)
check
=
Assert
(
'GpuDnnConv: qiven output (for beta not null) does not have expected shape'
)
check
=
Assert
(
'GpuDnnConv: qiven output (for beta not null) does not have expected shape'
)
real_out
=
check
(
out
,
theano
.
tensor
.
all
(
theano
.
tensor
.
eq
(
out
.
shape
,
out_shp
)))
real_out
=
check
(
out
,
theano
.
tensor
.
all
(
theano
.
tensor
.
eq
(
out
.
shape
,
out_shp
)))
else
:
real_out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
return
GpuDnnConv
(
algo
=
algo
)(
img
,
kerns
,
real_out
,
desc
,
alpha
,
beta
)
return
GpuDnnConv
(
algo
=
algo
)(
img
,
kerns
,
real_out
,
desc
,
alpha
,
beta
)
...
@@ -207,78 +202,28 @@ def dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mod
...
@@ -207,78 +202,28 @@ def dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mod
return
GpuDnnConvGradI
(
algo
=
algo
)(
kerns
,
topgrad
,
real_out
,
desc
,
alpha
,
beta
)
return
GpuDnnConvGradI
(
algo
=
algo
)(
kerns
,
topgrad
,
real_out
,
desc
,
alpha
,
beta
)
class
BaseGpuDnnConvChooseAlgo
(
DnnBase
):
def
check_fwd_dtype_config_support
(
dtype
,
precision
):
"""
inputs_shape
=
(
1
,
1
,
3
,
3
)
This class and its subclasses allow to retrieve a cuDNN algorithm
filters_shape
=
(
1
,
1
,
2
,
2
)
at runtime without any computation, given the user choose option
inputs
=
np
.
zeros
(
inputs_shape
,
dtype
=
dtype
)
(time_once, time_on_shape_change, guess_once or guess_on_shape_change).
filters
=
np
.
zeros
(
filters_shape
,
dtype
=
dtype
)
To help reduce whole test time, I suggest we use these classes when
inputs
=
theano
.
shared
(
inputs
)
algo is one of choose options, as any chosen algorithm would have
filters
=
theano
.
shared
(
filters
)
been tested by the other exhaustive tests.
conv
=
dnn_conv
(
inputs
,
filters
,
precision
=
precision
)
"""
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
try
:
_f16_ok
=
True
f
()
check_input
=
False
except
RuntimeError
as
e
:
__props__
=
(
'choice'
,)
assert
'CUDNN_STATUS_ARCH_MISMATCH'
in
e
.
message
params_type
=
ParamsType
(
choose_once
=
bool_t
,
choose_time
=
bool_t
,
handle
=
handle_type
)
return
False
return
True
# Abstract attributes.
func_file
=
None
func_name
=
None
def
__init__
(
self
,
choice
):
COp
.
__init__
(
self
,
[
"../dnn_base.c"
,
"../dnn_conv_base.c"
,
self
.
func_file
],
self
.
func_name
)
assert
choice
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
self
.
choice
=
choice
self
.
choose_once
=
self
.
choice
in
DNN_CONV_ALGO_CHOOSE_ONCE
self
.
choose_time
=
self
.
choice
in
DNN_CONV_ALGO_CHOOSE_TIME
def
dnn_context
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context_name
def
_prepare_inputs
(
self
,
i1
,
name_i1
,
i2
,
name_i2
,
output
,
desc
):
ctx_name
=
infer_context_name
(
i1
,
i2
,
output
)
i1
=
as_gpuarray_variable
(
i1
,
ctx_name
)
i2
=
as_gpuarray_variable
(
i2
,
ctx_name
)
output
=
as_gpuarray_variable
(
output
,
ctx_name
)
if
i1
.
type
.
ndim
not
in
(
4
,
5
):
raise
TypeError
(
'
%
s must be 4D or 5D tensor'
%
name_i1
)
if
i2
.
type
.
ndim
not
in
(
4
,
5
):
raise
TypeError
(
'
%
s must be 4D or 5D tensor'
%
name_i2
)
if
output
.
type
.
ndim
not
in
(
4
,
5
):
raise
TypeError
(
'output must be 4D or 5D tensor'
)
if
i1
.
type
.
ndim
!=
i2
.
type
.
ndim
or
i1
.
type
.
ndim
!=
output
.
type
.
ndim
:
raise
TypeError
(
"The number of dimensions of
%
s,
%
s and output must match"
%
(
name_i1
,
name_i2
))
if
not
isinstance
(
desc
.
type
,
CDataType
)
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
return
(
i1
,
i2
,
output
,
desc
)
class
GpuDnnConvChooseFwdAlgo
(
BaseGpuDnnConvChooseAlgo
):
func_file
=
'dnn_choose_fwd.c'
func_name
=
'APPLY_SPECIFIC(choose_fwd_algo)'
def
make_node
(
self
,
img
,
kern
,
output
,
desc
):
img
,
kern
,
output
,
desc
=
self
.
_prepare_inputs
(
img
,
'img'
,
kern
,
'kern'
,
output
,
desc
)
return
Apply
(
self
,
[
img
,
kern
,
output
,
desc
],
[
cudnn
.
cudnnConvolutionFwdAlgo_t
()])
def
test_fwd_true_half_config_support
():
class
GpuDnnConvChooseBwdFilterAlgo
(
BaseGpuDnnConvChooseAlgo
):
# For cuDNN V5.1 and V6.0:
func_file
=
'dnn_choose_gw.c'
# "TRUE_HALF_CONFIG is only supported on architectures with true fp16 support (compute capability 5.3 and 6.0)"
func_name
=
'APPLY_SPECIFIC(choose_bwd_filter_algo)'
if
not
check_fwd_dtype_config_support
(
'float16'
,
'float16'
):
raise
SkipTest
(
'FWD: TRUE_HALF_CONFIG not supported on this GPU.'
)
def
make_node
(
self
,
img
,
topgrad
,
output
,
desc
):
img
,
topgrad
,
output
,
desc
=
self
.
_prepare_inputs
(
img
,
'img'
,
topgrad
,
'topgrad'
,
output
,
desc
)
return
Apply
(
self
,
[
img
,
topgrad
,
output
,
desc
],
[
cudnn
.
cudnnConvolutionBwdFilterAlgo_t
()])
class
GpuDnnConvChooseBwdDataAlgo
(
BaseGpuDnnConvChooseAlgo
):
func_file
=
'dnn_choose_gi.c'
func_name
=
'APPLY_SPECIFIC(choose_bwd_data_algo)'
def
make_node
(
self
,
kern
,
topgrad
,
output
,
desc
):
kern
,
topgrad
,
output
,
desc
=
self
.
_prepare_inputs
(
kern
,
'kern'
,
topgrad
,
'topgrad'
,
output
,
desc
)
return
Apply
(
self
,
[
kern
,
topgrad
,
output
,
desc
],
[
cudnn
.
cudnnConvolutionBwdDataAlgo_t
()])
class
BaseTestDnnConv
(
object
):
class
BaseTestDnnConv
(
object
):
...
@@ -287,10 +232,6 @@ class BaseTestDnnConv(object):
...
@@ -287,10 +232,6 @@ class BaseTestDnnConv(object):
to run actual tests.
to run actual tests.
"""
"""
_functions_checked_for_fwd
=
False
_functions_checked_for_gradinput
=
False
_functions_checked_for_gradweight
=
False
# Abstract attributes.
# Abstract attributes.
ndim
=
2
ndim
=
2
...
@@ -303,25 +244,25 @@ class BaseTestDnnConv(object):
...
@@ -303,25 +244,25 @@ class BaseTestDnnConv(object):
cpu_gradinput_class
=
None
cpu_gradinput_class
=
None
cpu_gradweight_class
=
None
cpu_gradweight_class
=
None
# Utility methods.
def
get_cases
(
self
):
def
get_cases
(
self
):
# Return an iterable of test cases. Each test case is a tuple (or list) with following syntax:
# Return an iterable of test cases. Each test case is a tuple (or list) with following syntax:
# (
(input shape, filter shape, subsample, dilation), border mode, convolution mode, alpha, beta
)
# (
input shape, filter shape, subsample, dilation, border mode, convolution mode, alpha, beta
)
generator
=
DnnCaseGenerator
(
ndim
=
self
.
ndim
)
generator
=
DnnCaseGenerator
(
ndim
=
self
.
ndim
)
return
generator
.
get_cases
()
return
generator
.
get_cases
()
# Run and utility methods.
def
array_like_conv_output
(
self
,
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
,
dtype
):
def
array_like_conv_output
(
self
,
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
):
# Return an random array with inferred convolution output shape.
# Return an random array with inferred convolution output shape.
out_shp
=
get_conv_output_shape
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
)
out_shp
=
get_conv_output_shape
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
)
out_shp
=
assert_conv_shape
(
out_shp
)
out_shp
=
assert_conv_shape
(
out_shp
)
return
np
.
random
.
random
(
out_shp
)
.
astype
(
theano
.
config
.
floatX
)
return
np
.
random
.
random
(
out_shp
)
.
astype
(
dtype
)
def
run_conv_fwd
(
self
,
algo
,
precision
,
parameters
):
def
run_conv_fwd
(
self
,
algo
,
dtype
,
precision
,
parameters
):
(
inputs_shape
,
filters_shape
,
subsample
,
dilation
)
,
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
inputs_shape
,
filters_shape
,
subsample
,
dilation
,
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
theano
.
config
.
floatX
)
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
dtype
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
theano
.
config
.
floatX
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
dtype
)
# Scale down the input values to prevent very large absolute errors
# Scale down the input values to prevent very large absolute errors
# due to float rounding
# due to float rounding
...
@@ -331,8 +272,11 @@ class BaseTestDnnConv(object):
...
@@ -331,8 +272,11 @@ class BaseTestDnnConv(object):
inputs
=
theano
.
shared
(
inputs_val
)
inputs
=
theano
.
shared
(
inputs_val
)
filters
=
theano
.
shared
(
filters_val
)
filters
=
theano
.
shared
(
filters_val
)
out
=
None
if
beta
==
0
else
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
if
beta
==
0
:
dilation
)
out
=
None
else
:
out
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
,
dtype
)
out
/=
10
# Compile a theano function for the cuDNN implementation
# Compile a theano function for the cuDNN implementation
conv
=
dnn_conv
(
img
=
inputs
,
kerns
=
filters
,
alpha
=
alpha
,
beta
=
beta
,
out
=
out
,
border_mode
=
border_mode
,
conv
=
dnn_conv
(
img
=
inputs
,
kerns
=
filters
,
alpha
=
alpha
,
beta
=
beta
,
out
=
out
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
algo
=
algo
,
precision
=
precision
)
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
algo
=
algo
,
precision
=
precision
)
...
@@ -354,15 +298,6 @@ class BaseTestDnnConv(object):
...
@@ -354,15 +298,6 @@ class BaseTestDnnConv(object):
filter_dilation
=
dilation
)(
ref_cast
(
inputs
),
flipped_filters
)
filter_dilation
=
dilation
)(
ref_cast
(
inputs
),
flipped_filters
)
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
"FAST_RUN"
)
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
"FAST_RUN"
)
if
not
self
.
_functions_checked_for_fwd
:
self
.
_functions_checked_for_fwd
=
True
assert
any
(
isinstance
(
node
.
op
,
GpuDnnConv
)
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
assert
not
any
(
isinstance
(
node
.
op
,
(
GpuDnnConvGradI
,
GpuDnnConvGradW
))
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
assert
not
any
(
isinstance
(
node
.
op
,
(
GpuDnnConv
,
GpuDnnConvGradW
,
GpuDnnConvGradI
))
for
node
in
f_ref
.
maker
.
fgraph
.
apply_nodes
)
# Compare the results of the two implementations
# Compare the results of the two implementations
res_ref
=
f_ref
()
res_ref
=
f_ref
()
res
=
f
()
res
=
f
()
...
@@ -371,19 +306,26 @@ class BaseTestDnnConv(object):
...
@@ -371,19 +306,26 @@ class BaseTestDnnConv(object):
utt
.
assert_allclose
(
res
,
res2
)
utt
.
assert_allclose
(
res
,
res2
)
# Raise tolerance for float16
# Raise tolerance for float16
rtol
=
6e-2
if
theano
.
config
.
floatX
==
'float16'
else
None
rtol
=
6e-2
if
dtype
==
'float16'
else
None
if
beta
==
0
:
if
beta
==
0
:
utt
.
assert_allclose
(
alpha
*
res_ref
,
res
,
rtol
=
rtol
)
utt
.
assert_allclose
(
alpha
*
res_ref
,
res
,
rtol
=
rtol
)
else
:
else
:
# print('(conv: beta not null) ', end='')
utt
.
assert_allclose
(
alpha
*
res_ref
+
beta
*
out
,
res
,
rtol
=
rtol
)
utt
.
assert_allclose
(
alpha
*
res_ref
+
beta
*
out
,
res
,
rtol
=
rtol
)
def
run_conv_gradinput
(
self
,
algo
,
precision
,
parameters
):
def
run_conv_gradinput
(
self
,
algo
,
dtype
,
precision
,
parameters
):
(
inputs_shape
,
filters_shape
,
subsample
,
dilation
),
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
inputs_shape
,
filters_shape
,
subsample
,
dilation
,
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
if
beta
==
0
:
inputs_val
=
None
else
:
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
dtype
)
inputs_val
/=
10
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
dtype
)
topgrad_val
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
,
dtype
)
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
theano
.
config
.
floatX
)
# Scale down the input values to prevent absolute errors in utt.assert_allclose.
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
theano
.
config
.
floatX
)
filters_val
/=
10
topgrad_val
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
)
topgrad_val
/=
10
filters
=
theano
.
shared
(
filters_val
)
filters
=
theano
.
shared
(
filters_val
)
topgrad
=
theano
.
shared
(
topgrad_val
)
topgrad
=
theano
.
shared
(
topgrad_val
)
...
@@ -412,15 +354,6 @@ class BaseTestDnnConv(object):
...
@@ -412,15 +354,6 @@ class BaseTestDnnConv(object):
)(
ref_cast
(
flipped_filters
),
ref_cast
(
topgrad
),
inputs_shape
[
2
:])
)(
ref_cast
(
flipped_filters
),
ref_cast
(
topgrad
),
inputs_shape
[
2
:])
f_ref
=
theano
.
function
([],
grad_i_ref
,
mode
=
"FAST_RUN"
)
f_ref
=
theano
.
function
([],
grad_i_ref
,
mode
=
"FAST_RUN"
)
if
not
self
.
_functions_checked_for_gradinput
:
self
.
_functions_checked_for_gradinput
=
True
assert
any
(
isinstance
(
node
.
op
,
GpuDnnConvGradI
)
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
assert
not
any
(
isinstance
(
node
.
op
,
(
GpuDnnConv
,
GpuDnnConvGradW
))
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
assert
not
any
(
isinstance
(
node
.
op
,
(
GpuDnnConv
,
GpuDnnConvGradW
,
GpuDnnConvGradI
))
for
node
in
f_ref
.
maker
.
fgraph
.
apply_nodes
)
# Compare the results of the two implementations
# Compare the results of the two implementations
res_ref
=
f_ref
()
res_ref
=
f_ref
()
res
=
f
()
res
=
f
()
...
@@ -429,15 +362,26 @@ class BaseTestDnnConv(object):
...
@@ -429,15 +362,26 @@ class BaseTestDnnConv(object):
utt
.
assert_allclose
(
res
,
res2
)
utt
.
assert_allclose
(
res
,
res2
)
# Raise tolerance for float16
# Raise tolerance for float16
rtol
=
5e-2
if
theano
.
config
.
floatX
==
'float16'
else
None
rtol
=
5e-2
if
dtype
==
'float16'
else
None
utt
.
assert_allclose
(
alpha
*
res_ref
+
beta
*
inputs_val
,
res
,
rtol
=
rtol
)
if
beta
==
0
:
utt
.
assert_allclose
(
alpha
*
res_ref
,
res
,
rtol
=
rtol
)
else
:
utt
.
assert_allclose
(
alpha
*
res_ref
+
beta
*
inputs_val
,
res
,
rtol
=
rtol
)
def
run_conv_gradweight
(
self
,
algo
,
precision
,
parameters
):
def
run_conv_gradweight
(
self
,
algo
,
dtype
,
precision
,
parameters
):
(
inputs_shape
,
filters_shape
,
subsample
,
dilation
)
,
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
inputs_shape
,
filters_shape
,
subsample
,
dilation
,
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
theano
.
config
.
floatX
)
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
dtype
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
theano
.
config
.
floatX
)
if
beta
==
0
:
topgrad_val
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
)
filters_val
=
None
else
:
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
dtype
)
filters_val
/=
10
topgrad_val
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
,
dtype
)
# Scale down the input values to prevent absolute errors in utt.assert_allclose.
inputs_val
/=
10
topgrad_val
/=
10
inputs
=
theano
.
shared
(
inputs_val
)
inputs
=
theano
.
shared
(
inputs_val
)
topgrad
=
theano
.
shared
(
topgrad_val
)
topgrad
=
theano
.
shared
(
topgrad_val
)
...
@@ -458,15 +402,6 @@ class BaseTestDnnConv(object):
...
@@ -458,15 +402,6 @@ class BaseTestDnnConv(object):
grad_w_ref
=
grad_w_ref
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
grad_w_ref
=
grad_w_ref
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
f_ref
=
theano
.
function
([],
grad_w_ref
,
mode
=
"FAST_RUN"
)
f_ref
=
theano
.
function
([],
grad_w_ref
,
mode
=
"FAST_RUN"
)
if
not
self
.
_functions_checked_for_gradweight
:
self
.
_functions_checked_for_gradweight
=
True
assert
any
(
isinstance
(
node
.
op
,
GpuDnnConvGradW
)
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
assert
not
any
(
isinstance
(
node
.
op
,
(
GpuDnnConv
,
GpuDnnConvGradI
))
for
node
in
f
.
maker
.
fgraph
.
apply_nodes
)
assert
not
any
(
isinstance
(
node
.
op
,
(
GpuDnnConv
,
GpuDnnConvGradW
,
GpuDnnConvGradI
))
for
node
in
f_ref
.
maker
.
fgraph
.
apply_nodes
)
# Compare the results of the two implementations
# Compare the results of the two implementations
res_ref
=
f_ref
()
res_ref
=
f_ref
()
res
=
f
()
res
=
f
()
...
@@ -475,119 +410,58 @@ class BaseTestDnnConv(object):
...
@@ -475,119 +410,58 @@ class BaseTestDnnConv(object):
utt
.
assert_allclose
(
res
,
res2
)
utt
.
assert_allclose
(
res
,
res2
)
# Raise tolerance for float16
# Raise tolerance for float16
rtol
=
5e-2
if
theano
.
config
.
floatX
==
'float16'
else
None
rtol
=
5e-2
if
dtype
==
'float16'
else
None
utt
.
assert_allclose
(
alpha
*
res_ref
+
beta
*
filters_val
,
res
,
rtol
=
rtol
)
if
beta
==
0
:
utt
.
assert_allclose
(
alpha
*
res_ref
,
res
,
rtol
=
rtol
)
def
run_choose_runtime_algos
(
self
,
algo
,
precision
,
parameters
):
else
:
(
inputs_shape
,
filters_shape
,
subsample
,
dilation
),
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
utt
.
assert_allclose
(
alpha
*
res_ref
+
beta
*
filters_val
,
res
,
rtol
=
rtol
)
out_shp
=
assert_conv_shape
(
get_conv_output_shape
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
))
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
theano
.
config
.
floatX
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
theano
.
config
.
floatX
)
topgrad_val
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
)
inputs
=
theano
.
shared
(
inputs_val
)
filters
=
theano
.
shared
(
filters_val
)
topgrad
=
theano
.
shared
(
topgrad_val
)
ctx_name
=
infer_context_name
(
inputs
,
topgrad
)
desc_filter
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
filters_shape
)
array_like_filters
=
GpuAllocEmpty
(
dtype
=
inputs
.
dtype
,
context_name
=
ctx_name
)(
*
filters_shape
)
array_like_inputs
=
GpuAllocEmpty
(
dtype
=
inputs
.
dtype
,
context_name
=
ctx_name
)(
*
inputs_shape
)
array_like_conv_output
=
GpuAllocEmpty
(
dtype
=
inputs
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
algo_filter
=
GpuDnnConvChooseBwdFilterAlgo
(
algo
)(
inputs
,
topgrad
,
array_like_filters
,
desc_filter
)
algo_input
=
GpuDnnConvChooseBwdDataAlgo
(
algo
)(
filters
,
topgrad
,
array_like_inputs
,
desc_filter
)
algo_conv
=
GpuDnnConvChooseFwdAlgo
(
algo
)(
inputs
,
filters
,
array_like_conv_output
,
desc_filter
)
f
=
theano
.
function
([],
[
algo_filter
,
algo_input
,
algo_conv
],
mode
=
mode_with_gpu
)
# Just test that it runs.
algo_filter_val
,
algo_input_val
,
algo_conv_val
=
f
()
# How to test if it "works" ?
def
get_expected_tcount
(
self
):
def
get_expected_tcount
(
self
):
"""
"""
Utility function to get expected test count
Utility function to get expected test count
without actually run nosetests.
without actually run nosetests.
"""
"""
len_cases
=
0
len_cases
=
sum
(
1
for
case
in
self
.
get_cases
())
for
c
in
self
.
get_cases
():
count_contexts
=
0
len_cases
+=
1
for
dtype
,
precision
in
cudnn
.
get_fwd_dtype_configs
(
check_runtime
=
check_fwd_dtype_config_support
):
print
(
len_cases
,
'conv cases for
%
dD'
%
self
.
ndim
)
algos
=
(
algo
for
algo
in
self
.
fwd_algorithms
return
len
(
AVAILABLE_PRECISIONS
)
*
len_cases
*
len
(
self
.
fwd_algorithms
+
if
cudnn
.
fwd_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
self
.
ndim
))
self
.
bwd_data_algorithms
+
count_contexts
+=
sum
(
1
for
algo
in
algos
)
+
len
(
SUPPORTED_DNN_CONV_ALGO_RUNTIME
)
self
.
bwd_filter_algorithms
+
for
dtype
,
precision
in
cudnn
.
get_bwd_data_dtype_configs
():
SUPPORTED_DNN_CONV_ALGO_RUNTIME
)
algos
=
(
algo
for
algo
in
self
.
bwd_data_algorithms
if
cudnn
.
bwd_data_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
self
.
ndim
))
count_contexts
+=
sum
(
1
for
algo
in
algos
)
+
len
(
SUPPORTED_DNN_CONV_ALGO_RUNTIME
)
for
dtype
,
precision
in
cudnn
.
get_bwd_filter_dtype_configs
():
algos
=
(
algo
for
algo
in
self
.
bwd_filter_algorithms
if
cudnn
.
bwd_filter_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
self
.
ndim
))
count_contexts
+=
sum
(
1
for
algo
in
algos
)
+
len
(
SUPPORTED_DNN_CONV_ALGO_RUNTIME
)
return
len_cases
*
count_contexts
# Iterable test methods.
# Iterable test methods.
def
test_fwd
(
self
):
def
test_fwd
(
self
):
for
precision
,
algo
,
parameters
in
product
(
AVAILABLE_PRECISIONS
,
self
.
fwd_algorithms
,
self
.
get_cases
()):
for
dtype
,
precision
in
cudnn
.
get_fwd_dtype_configs
(
check_runtime
=
check_fwd_dtype_config_support
):
yield
(
self
.
run_conv_fwd
,
algo
,
precision
,
parameters
)
algos
=
(
algo
for
algo
in
self
.
fwd_algorithms
if
cudnn
.
fwd_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
self
.
ndim
))
for
algo
in
chain
(
algos
,
SUPPORTED_DNN_CONV_ALGO_RUNTIME
):
for
parameters
in
self
.
get_cases
():
yield
(
self
.
run_conv_fwd
,
algo
,
dtype
,
precision
,
parameters
)
def
test_gradinput
(
self
):
def
test_gradinput
(
self
):
for
precision
,
algo
,
parameters
in
product
(
AVAILABLE_PRECISIONS
,
self
.
bwd_data_algorithms
,
self
.
get_cases
()):
for
dtype
,
precision
in
cudnn
.
get_bwd_data_dtype_configs
():
yield
(
self
.
run_conv_gradinput
,
algo
,
precision
,
parameters
)
algos
=
(
algo
for
algo
in
self
.
bwd_data_algorithms
if
cudnn
.
bwd_data_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
self
.
ndim
))
for
algo
in
chain
(
algos
,
SUPPORTED_DNN_CONV_ALGO_RUNTIME
):
for
parameters
in
self
.
get_cases
():
yield
(
self
.
run_conv_gradinput
,
algo
,
dtype
,
precision
,
parameters
)
def
test_gradweight
(
self
):
def
test_gradweight
(
self
):
for
precision
,
algo
,
parameters
in
product
(
AVAILABLE_PRECISIONS
,
self
.
bwd_filter_algorithms
,
self
.
get_cases
()):
for
dtype
,
precision
in
cudnn
.
get_bwd_filter_dtype_configs
():
yield
(
self
.
run_conv_gradweight
,
algo
,
precision
,
parameters
)
algos
=
(
algo
for
algo
in
self
.
bwd_filter_algorithms
if
cudnn
.
bwd_filter_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
self
.
ndim
))
def
test_choose_runtime_algos
(
self
):
for
algo
in
chain
(
algos
,
SUPPORTED_DNN_CONV_ALGO_RUNTIME
):
for
precision
,
algo
,
parameters
in
product
(
AVAILABLE_PRECISIONS
,
SUPPORTED_DNN_CONV_ALGO_RUNTIME
,
for
parameters
in
self
.
get_cases
():
self
.
get_cases
()):
yield
(
self
.
run_conv_gradweight
,
algo
,
dtype
,
precision
,
parameters
)
yield
(
self
.
run_choose_runtime_algos
,
algo
,
precision
,
parameters
)
def
check_fwd_predictions
(
self
):
"""
Call this method to check if tests fail when they
don't follow cuDNN V5.1 doc conditions for FWD algorithms.
Script will exit as soon as there is a test that does not fail when expected.
"""
print
()
print
(
'TESTING FWD FAILURES PREDICTED FOR
%
dD'
%
self
.
ndim
)
count
=
0
for
precision
,
algo
,
parameters
in
product
(
AVAILABLE_PRECISIONS
,
self
.
fwd_algorithms
,
self
.
get_cases
()):
(
inputs_shape
,
filters_shape
,
subsample
,
dilation
),
border_mode
,
conv_mode
,
alpha
,
beta
=
parameters
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
theano
.
config
.
floatX
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
theano
.
config
.
floatX
)
# Scale down the input values to prevent very large absolute errors
# due to float rounding
inputs_val
/=
10
filters_val
/=
10
out
=
self
.
array_like_conv_output
(
inputs_shape
,
filters_shape
,
border_mode
,
subsample
,
dilation
)
desc_op
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)
should_compute
=
check_fwd_algorithm
(
inputs_val
,
filters_val
,
out
,
desc_op
,
algo
,
precision
,
subsample
,
dilation
)
if
not
should_compute
.
ok
:
infos
=
[
'ndim :
%
s'
%
(
len
(
inputs_shape
)
-
2
),
'precision :
%
s'
%
precision
]
infos
+=
should_compute
.
messages
try
:
self
.
run_conv_fwd
(
algo
,
precision
,
parameters
)
except
Exception
as
e
:
print
(
'(FAILS as expected)'
,
algo
,
precision
,
parameters
)
print
(
e
.
message
.
split
(
'
\n
'
)[
0
])
for
info
in
infos
:
print
(
info
)
# exit(0)
else
:
print
(
'**SHOULD FAIL**|'
,
algo
,
precision
,
parameters
)
for
info
in
infos
:
print
(
info
)
exit
(
-
1
)
count
+=
1
if
count
%
200
==
0
:
print
(
count
,
'passed'
)
print
(
count
,
'finished'
)
class
TestDnnConv2D
(
BaseTestDnnConv
):
class
TestDnnConv2D
(
BaseTestDnnConv
):
...
@@ -615,23 +489,39 @@ class TestDnnConv3D(BaseTestDnnConv):
...
@@ -615,23 +489,39 @@ class TestDnnConv3D(BaseTestDnnConv):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
def
dtype_config_to_str
(
dtype_config
):
dtype
,
precision
=
dtype_config
if
dtype
==
precision
==
'float16'
:
return
'TRUE_HALF_CONFIG'
if
dtype
==
'float16'
and
precision
==
'float32'
:
return
'PSEUDO_HALF_CONFIG'
if
dtype
==
precision
==
'float32'
:
return
'FLOAT_CONFIG'
if
dtype
==
precision
==
'float64'
:
return
'DOUBLE_CONFIG'
raise
ValueError
test_2d
=
TestDnnConv2D
()
test_2d
=
TestDnnConv2D
()
test_3d
=
TestDnnConv3D
()
test_3d
=
TestDnnConv3D
()
print
()
print
(
'Available data type configurations :'
,
', '
.
join
(
dtype_config_to_str
(
d
)
for
d
in
cudnn
.
get_supported_dtype_configs
()))
print
()
print
(
'2D algorithms:'
)
print
(
'2D algorithms:'
)
print
(
'FWD :'
,
test_2d
.
fwd_algorithms
)
print
(
'FWD :'
,
', '
.
join
(
test_2d
.
fwd_algorithms
))
print
(
'BWD FILTER:'
,
test_2d
.
bwd_filter_algorithms
)
print
(
'BWD FILTER :'
,
', '
.
join
(
test_2d
.
bwd_filter_algorithms
))
print
(
'BWD DATA :'
,
test_2d
.
bwd_data_algorithms
)
print
(
'BWD DATA :'
,
', '
.
join
(
test_2d
.
bwd_data_algorithms
))
print
()
print
(
'3D algorithms:'
)
print
(
'3D algorithms:'
)
print
(
'FWD :'
,
test_3d
.
fwd_algorithms
)
print
(
'FWD :'
,
', '
.
join
(
test_3d
.
fwd_algorithms
))
print
(
'BWD FILTER:'
,
test_3d
.
bwd_filter_algorithms
)
print
(
'BWD FILTER :'
,
', '
.
join
(
test_3d
.
bwd_filter_algorithms
))
print
(
'BWD DATA :'
,
test_3d
.
bwd_data_algorithms
)
print
(
'BWD DATA :'
,
', '
.
join
(
test_3d
.
bwd_data_algorithms
))
print
()
count_tests_2d
=
test_2d
.
get_expected_tcount
()
count_tests_2d
=
test_2d
.
get_expected_tcount
()
count_tests_3d
=
test_3d
.
get_expected_tcount
()
count_tests_3d
=
test_3d
.
get_expected_tcount
()
print
(
count_tests_2d
,
'total cases for 2D.'
)
print
(
count_tests_2d
,
'conv2D test cases.'
)
print
(
count_tests_3d
,
'total cases for 3D.'
)
print
(
count_tests_3d
,
'conv3D test cases.'
)
print
(
count_tests_2d
+
count_tests_3d
,
'total cases.'
)
print
(
count_tests_2d
+
count_tests_3d
,
'total conv test cases.'
)
import
sys
print
()
nose
.
main
(
defaultTest
=
'theano.gpuarray.tests.check_dnn'
)
if
len
(
sys
.
argv
)
==
2
and
sys
.
argv
[
1
]
==
'run'
:
test_2d
.
check_fwd_predictions
()
test_3d
.
check_fwd_predictions
()
theano/gpuarray/tests/check_dnn_doc.py
deleted
100644 → 0
浏览文件 @
6cc0c5ca
"""
This module is just a collection of definitions to be used by `check_dnn.py`.
Following classes, functions and definitions are used to check if
tests fail as expected when conditions listed into cuDNN documentation are not verified.
I have currently implemented checking only for 2D/3D FWD algorithms in cuDNN V5.1,
and in practice, many tests pass even when they don't follow cuDNN doc conditions.
So, I think we should better just run all tests and check ourselves
which tests pass, which fail, and why they fail.
Reminder:
N: batch number
C: number of feature maps
D: depth
H: height
W: width
NB: We assume that we **always** use NC(D)HW tensors in Theano.
"""
from
__future__
import
absolute_import
,
print_function
,
division
import
theano
from
..cudnn_defs
import
HALF
,
FLOAT
,
DOUBLE
,
get_definitions
from
..dnn
import
version
UNKNOWN
,
TRUE_HALF_CONFIG
,
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
,
DOUBLE_CONFIG
=
-
1
,
0
,
1
,
2
,
3
cudnn
=
get_definitions
(
version
(
raises
=
False
))
cudnnConvolutionFwdAlgo_t
=
cudnn
.
cudnnConvolutionFwdAlgo_t
class
Success
:
ok
=
True
messages
=
[]
def
__init__
(
self
,
messages
=
[]):
self
.
messages
=
list
(
messages
)
def
add_message
(
self
,
*
parts
):
self
.
messages
.
append
(
''
.
join
(
str
(
part
)
for
part
in
parts
))
class
Failure
(
Success
):
ok
=
False
def
_and
(
*
tests
):
# `tests` is a list of tuples with format (lambda test, test description)
messages
=
[]
for
test_lambda
,
message
in
tests
:
if
not
test_lambda
():
messages
.
append
(
message
)
return
Failure
(
messages
)
if
messages
else
Success
()
def
_or
(
*
tests
):
messages
=
[]
ok
=
False
for
test_lambda
,
message
in
tests
:
if
test_lambda
():
ok
=
True
break
else
:
messages
.
append
(
message
)
return
Success
()
if
ok
else
Failure
(
messages
)
def
type_conf
(
precision
):
# All Op's input tensors are floatX tensors.
floatX
=
theano
.
config
.
floatX
if
floatX
==
precision
==
HALF
:
return
TRUE_HALF_CONFIG
if
floatX
==
HALF
and
precision
==
FLOAT
:
return
PSEUDO_HALF_CONFIG
if
floatX
==
precision
==
FLOAT
:
return
FLOAT_CONFIG
if
floatX
==
precision
==
DOUBLE
:
return
DOUBLE_CONFIG
return
UNKNOWN
# raise ValueError('Unknown data type configuration (%s %s)' % (floatX, precision))
def
type_conf_to_string
(
conf
):
if
conf
==
-
1
:
return
'UNKNOWN'
if
conf
==
0
:
return
'TRUE_HALF_CONFIG'
if
conf
==
1
:
return
'PSEUDO_HALF_CONFIG'
if
conf
==
2
:
return
'FLOAT_CONFIG'
if
conf
==
3
:
return
'DOUBLE_CONFIG'
def
strideof
(
tensor
,
i
):
return
tensor
.
strides
[
i
]
//
tensor
.
itemsize
def
tensor_is_partially_packed
(
tensor
,
packed_dim_names
):
if
tensor
.
ndim
==
4
:
dim_names
=
'NCHW'
else
:
dim_names
=
'NCDHW'
packed_dims
=
[]
unpacked_dims
=
[]
for
i
in
range
(
tensor
.
ndim
-
1
):
if
dim_names
[
i
]
in
packed_dim_names
:
packed_dims
.
append
(
i
)
else
:
unpacked_dims
.
append
(
i
)
if
dim_names
[
tensor
.
ndim
-
1
]
in
packed_dim_names
and
strideof
(
tensor
,
-
1
)
!=
1
:
# We won't put last dimension in the list of packed dims.
# We just need to check if stride of that dimension is 1.
return
False
return
(
all
(
strideof
(
tensor
,
i
)
>=
tensor
.
shape
[
i
+
1
]
*
strideof
(
tensor
,
i
+
1
)
for
i
in
unpacked_dims
)
and
all
(
strideof
(
tensor
,
i
)
==
tensor
.
shape
[
i
+
1
]
*
strideof
(
tensor
,
i
+
1
)
for
i
in
packed_dims
))
def
tensor_is_fully_packed
(
tensor
):
return
strideof
(
tensor
,
-
1
)
==
1
and
all
(
strideof
(
tensor
,
i
)
==
tensor
.
shape
[
i
+
1
]
*
strideof
(
tensor
,
i
+
1
)
for
i
in
range
(
tensor
.
ndim
-
1
))
def
check_fwd_algorithm
(
img
,
kern
,
out
,
desc_op
,
algo
,
precision
,
subsample
,
dilation
):
# Based on cuDNN v5.1 user guide.
ndim
=
img
.
ndim
-
2
if
ndim
==
2
:
# rD won't be used.
rD
,
rH
,
rW
=
-
1
,
0
,
1
else
:
rD
,
rH
,
rW
=
0
,
1
,
2
algo
=
cudnnConvolutionFwdAlgo_t
.
fromalias
(
algo
)
kern_shape
=
kern
.
shape
[
2
:]
kern_shape
=
tuple
((
kern_shape
[
i
]
-
1
)
*
dilation
[
i
]
+
1
for
i
in
range
(
len
(
dilation
)))
pad
=
(
desc_op
.
pad0
,
desc_op
.
pad1
,
desc_op
.
pad2
)[:
len
(
kern_shape
)]
if
desc_op
.
bmode
==
'full'
:
pad
=
tuple
(
kern_shape
[
i
]
-
1
for
i
in
range
(
len
(
pad
)))
elif
desc_op
.
bmode
==
'half'
:
pad
=
tuple
(
kern_shape
[
i
]
//
2
for
i
in
range
(
len
(
pad
)))
img_shape
=
img
.
shape
[
2
:]
img_with_borders
=
tuple
(
img_shape
[
i
]
+
2
*
pad
[
i
]
for
i
in
range
(
len
(
pad
)))
def
check_algo
():
if
algo
==
cudnnConvolutionFwdAlgo_t
.
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
:
return
_and
((
lambda
:
type_conf
(
precision
)
!=
TRUE_HALF_CONFIG
,
"Data Type Config Support: All except TRUE_HALF_CONFIG"
))
# CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM: 2D: everything supported.
if
ndim
==
3
and
algo
==
cudnnConvolutionFwdAlgo_t
.
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
:
return
_and
(
(
lambda
:
type_conf
(
precision
)
!=
TRUE_HALF_CONFIG
,
"Data Type Config Support: All except TRUE_HALF_CONFIG"
),
(
lambda
:
tensor_is_fully_packed
(
img
),
"xDesc Format Support: NCDHW-fully-packed"
),
(
lambda
:
tensor_is_fully_packed
(
out
),
"yDesc Format Support: NCDHW-fully-packed"
),
)
if
algo
==
cudnnConvolutionFwdAlgo_t
.
CUDNN_CONVOLUTION_FWD_ALGO_GEMM
:
return
_and
(
(
lambda
:
type_conf
(
precision
)
!=
TRUE_HALF_CONFIG
,
"Data Type Config Support: All except TRUE_HALF_CONFIG"
),
(
lambda
:
ndim
==
2
,
"Only for conv2d"
)
)
# CUDNN_CONVOLUTION_FWD_ALGO_DIRECT: not implemented.
if
algo
==
cudnnConvolutionFwdAlgo_t
.
CUDNN_CONVOLUTION_FWD_ALGO_FFT
:
return
_and
(
(
lambda
:
type_conf
(
precision
)
in
(
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
),
"Data Type Config Support: PSEUDO_HALF_CONFIG, FLOAT_CONFIG"
),
(
lambda
:
ndim
==
2
,
"Only for conv2d"
),
(
lambda
:
tensor_is_partially_packed
(
img
,
'HW'
),
"xDesc Format Support: NCHW HW-packed"
),
(
lambda
:
tensor_is_partially_packed
(
out
,
'HW'
),
"yDesc Format Support: NCHW HW-packed"
),
(
lambda
:
img_with_borders
[
rH
]
<=
256
,
"xDesc 's feature map height + 2 * convDesc 's zero-padding height must equal 256 or less"
),
(
lambda
:
img_with_borders
[
rW
]
<=
256
,
"xDesc 's feature map width + 2 * convDesc 's zero-padding width must equal 256 or less"
),
(
lambda
:
subsample
[
rH
]
==
subsample
[
rW
]
==
1
,
"convDesc 's vertical and horizontal filter stride must equal 1"
),
(
lambda
:
kern_shape
[
rH
]
>
pad
[
rH
],
"wDesc 's filter height must be greater than convDesc 's zero-padding height"
),
(
lambda
:
kern_shape
[
rW
]
>
pad
[
rW
],
"wDesc 's filter width must be greater than convDesc 's zero-padding width"
)
)
if
algo
==
cudnnConvolutionFwdAlgo_t
.
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
:
if
ndim
==
2
:
return
_and
(
(
lambda
:
type_conf
(
precision
)
in
(
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
),
"Data Type Config Support: PSEUDO_HALF_CONFIG, FLOAT_CONFIG"
),
(
lambda
:
tensor_is_partially_packed
(
img
,
'HW'
),
"xDesc Format Support: NCHW HW-packed"
),
(
lambda
:
tensor_is_partially_packed
(
out
,
'HW'
),
"yDesc Format Support: NCHW HW-packed"
),
(
lambda
:
kern_shape
[
rH
]
<=
32
,
"wDesc 's filter height must equal 32 or less"
),
(
lambda
:
kern_shape
[
rW
]
<=
32
,
"wDesc 's filter width must equal 32 or less"
),
(
lambda
:
subsample
[
rH
]
==
subsample
[
rW
]
==
1
,
"convDesc 's vertical and horizontal filter stride must equal 1"
),
(
lambda
:
pad
[
rH
]
<
kern_shape
[
rH
],
"wDesc 's filter height must be greater than convDesc 's zero-padding height"
),
(
lambda
:
pad
[
rW
]
<
kern_shape
[
rW
],
"wDesc 's filter width must be greater than convDesc 's zero-padding width"
),
)
if
ndim
==
3
:
return
_and
(
(
lambda
:
type_conf
(
precision
)
!=
TRUE_HALF_CONFIG
,
"Data Type Config Support: All except TRUE_HALF_CONFIG"
),
(
lambda
:
tensor_is_partially_packed
(
img
,
'DHW'
),
"xDesc Format Support: NCDHW DHW-packed"
),
(
lambda
:
tensor_is_partially_packed
(
out
,
'DHW'
),
"yDesc Format Support: NCDHW DHW-packed"
),
(
lambda
:
kern_shape
[
rH
]
<=
16
,
"wDesc 's filter height must equal 16 or less"
),
(
lambda
:
kern_shape
[
rW
]
<=
16
,
"wDesc 's filter width must equal 16 or less"
),
(
lambda
:
kern_shape
[
rD
]
<=
16
,
"wDesc 's filter depth must equal 16 or less"
),
(
lambda
:
all
(
s
==
1
for
s
in
subsample
),
"convDesc 's must have all filter strides equal to 1"
),
(
lambda
:
pad
[
rH
]
<
kern_shape
[
rH
],
"wDesc 's filter height must be greater than convDesc 's zero-padding height"
),
(
lambda
:
pad
[
rW
]
<
kern_shape
[
rW
],
"wDesc 's filter width must be greater than convDesc 's zero-padding width"
),
(
lambda
:
pad
[
rW
]
<
kern_shape
[
rD
],
"wDesc 's filter depth must be greater than convDesc 's zero-padding width"
),
)
if
algo
==
cudnnConvolutionFwdAlgo_t
.
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD
:
return
_and
(
(
lambda
:
type_conf
(
precision
)
in
(
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
,
DOUBLE_CONFIG
),
"Data Type Config Support: PSEUDO_HALF_CONFIG, FLOAT_CONFIG"
),
(
lambda
:
ndim
==
2
,
"Only for conv2d"
),
(
lambda
:
subsample
[
rH
]
==
subsample
[
rW
]
==
1
,
"convDesc 's vertical and horizontal filter stride must equal 1"
),
(
lambda
:
kern_shape
[
rH
]
==
3
,
"wDesc 's filter height must be 3"
),
(
lambda
:
kern_shape
[
rW
]
==
3
,
"wDesc 's filter width must be 3"
),
)
if
algo
==
cudnnConvolutionFwdAlgo_t
.
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED
:
data_type_conf
=
type_conf
(
precision
)
return
_and
(
(
lambda
:
data_type_conf
!=
DOUBLE_CONFIG
,
"Data Type Config Support: All except DOUBLE_CONFIG"
),
(
lambda
:
ndim
==
2
,
"Only for conv2d"
),
(
lambda
:
subsample
[
rH
]
==
subsample
[
rW
]
==
1
,
"convDesc 's vertical and horizontal filter stride must equal 1"
),
(
lambda
:
kern_shape
[
rH
]
==
kern_shape
[
rW
]
and
kern_shape
[
rH
]
in
(
3
,
5
),
"wDesc 's filter (height, width) must be (3,3) or (5,5)"
),
(
lambda
:
kern_shape
[
rH
]
==
3
or
data_type_conf
!=
TRUE_HALF_CONFIG
,
"If wDesc 's filter (height, width) is (5,5), "
"data type config TRUE_HALF_CONFIG is not supported"
)
)
checking
=
check_algo
()
if
not
checking
.
ok
:
messages
=
checking
.
messages
checking
.
messages
=
[]
checking
.
add_message
(
'config : '
,
type_conf_to_string
(
type_conf
(
precision
)))
checking
.
add_message
(
'computed borders : '
,
pad
)
checking
.
add_message
(
'img with borders : '
,
img_with_borders
)
checking
.
add_message
(
'computed kern shape: '
,
kern_shape
)
checking
.
add_message
(
'== why should fail =='
)
checking
.
messages
+=
messages
return
checking
theano/gpuarray/tests/dnn_choose_fwd.c
deleted
100644 → 0
浏览文件 @
6cc0c5ca
#section init_code_struct
reuse_algo
=
0
;
prev_algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
if
(
!
PARAMS
->
choose_once
)
{
memset
(
prev_img_dims
,
0
,
sizeof
(
prev_img_dims
));
memset
(
prev_kern_dims
,
0
,
sizeof
(
prev_kern_dims
));
}
#section support_code_struct
int
reuse_algo
;
cudnnConvolutionFwdAlgo_t
prev_algo
;
size_t
prev_img_dims
[
5
];
size_t
prev_kern_dims
[
5
];
int
APPLY_SPECIFIC
(
choose_fwd_algo
)(
PyGpuArrayObject
*
input
,
PyGpuArrayObject
*
kerns
,
PyGpuArrayObject
*
output
,
cudnnConvolutionDescriptor_t
desc
,
cudnnConvolutionFwdAlgo_t
*
output_algo
,
PARAMS_TYPE
*
params
)
{
PyGpuContextObject
*
c
=
input
->
context
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same stack size"
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
cudnnConvolutionFwdAlgo_t
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
cuda_enter
(
c
->
ctx
);
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_NDIM
(
input
),
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
else
if
(
PyGpuArray_NDIM
(
input
)
==
5
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
PyGpuArray_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
expected_output_dims
[
4
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
],
PyGpuArray_DIMS
(
output
)[
4
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
if
(
!
params
->
choose_once
)
{
reuse_algo
=
1
;
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
input
);
++
i
)
{
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
input
,
i
)
==
prev_img_dims
[
i
]);
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
kerns
,
i
)
==
prev_kern_dims
[
i
]);
}
}
if
(
!
reuse_algo
)
{
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
if
(
params
->
choose_time
)
{
int
count
;
cudnnConvolutionFwdAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
return
-
1
;
}
err
=
cudnnFindConvolutionForwardAlgorithmEx
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
algo
=
choice
.
algo
;
}
else
{
err
=
cudnnGetConvolutionForwardAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
prev_algo
=
algo
;
}
else
{
algo
=
prev_algo
;
}
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
}
else
{
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
input
);
++
i
)
{
prev_img_dims
[
i
]
=
PyGpuArray_DIM
(
input
,
i
);
prev_kern_dims
[
i
]
=
PyGpuArray_DIM
(
kerns
,
i
);
}
}
/* These two algos are not supported for 3d conv */
if
(
PyGpuArray_NDIM
(
input
)
==
5
&&
(
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
||
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_GEMM
))
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
// does not support strides.
// If the chosen implementation is FFT or tiled-FFT, validate that it can
// be used on the current data and default to a safe implementation if it
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if
((
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT
||
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
)
&&
PyGpuArray_NDIM
(
input
)
==
4
)
{
// Extract the properties of the convolution descriptor
int
nd
;
int
pad
[
2
];
int
stride
[
2
];
int
dilation
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
dilation
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error getting convolution properties: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT
)
{
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
||
PyGpuArray_DIM
(
input
,
2
)
>
1024
||
PyGpuArray_DIM
(
input
,
3
)
>
1024
||
(
PyGpuArray_DIM
(
kerns
,
2
)
==
1
&&
PyGpuArray_DIM
(
kerns
,
3
)
==
1
))
{
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
}
}
else
{
// algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
)
{
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
}
}
}
*
output_algo
=
algo
;
cuda_exit
(
c
->
ctx
);
return
0
;
}
theano/gpuarray/tests/dnn_choose_gi.c
deleted
100644 → 0
浏览文件 @
6cc0c5ca
#section init_code_struct
reuse_algo
=
0
;
prev_algo
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
;
if
(
!
PARAMS
->
choose_once
)
{
memset
(
prev_kern_dims
,
0
,
sizeof
(
prev_kern_dims
));
memset
(
prev_top_dims
,
0
,
sizeof
(
prev_top_dims
));
}
#section support_code_struct
int
reuse_algo
;
cudnnConvolutionBwdDataAlgo_t
prev_algo
;
size_t
prev_kern_dims
[
5
];
size_t
prev_top_dims
[
5
];
int
APPLY_SPECIFIC
(
choose_bwd_data_algo
)(
PyGpuArrayObject
*
kerns
,
PyGpuArrayObject
*
output
,
PyGpuArrayObject
*
input
,
cudnnConvolutionDescriptor_t
desc
,
cudnnConvolutionBwdDataAlgo_t
*
output_algo
,
PARAMS_TYPE
*
params
)
{
PyGpuContextObject
*
c
=
kerns
->
context
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same stack size"
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
cudnnConvolutionBwdDataAlgo_t
algo
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
;
cuda_enter
(
c
->
ctx
);
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_NDIM
(
input
),
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
else
if
(
PyGpuArray_NDIM
(
input
)
==
5
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
PyGpuArray_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
expected_output_dims
[
4
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
],
PyGpuArray_DIMS
(
output
)[
4
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
if
(
!
params
->
choose_once
)
{
reuse_algo
=
1
;
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
kerns
);
++
i
)
{
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
kerns
,
i
)
==
prev_kern_dims
[
i
]);
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
output
,
i
)
==
prev_top_dims
[
i
]);
}
}
if
(
!
reuse_algo
)
{
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
if
(
params
->
choose_time
)
{
int
count
;
cudnnConvolutionBwdDataAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
return
-
1
;
}
err
=
cudnnFindConvolutionBackwardDataAlgorithmEx
(
params
->
handle
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
algo
=
choice
.
algo
;
}
else
{
err
=
cudnnGetConvolutionBackwardDataAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
prev_algo
=
algo
;
}
else
{
algo
=
prev_algo
;
}
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
}
else
{
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
kerns
);
++
i
)
{
prev_kern_dims
[
i
]
=
PyGpuArray_DIM
(
kerns
,
i
);
prev_top_dims
[
i
]
=
PyGpuArray_DIM
(
output
,
i
);
}
}
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
// does not support strides.
// If the chosen implementation is FFT or tiled-FFT, validate that it can
// be used on the current data and default to a safe implementation if it
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if
((
algo
==
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
||
algo
==
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT
)
&&
PyGpuArray_NDIM
(
kerns
)
==
4
)
{
// Extract the properties of the convolution descriptor
int
nd
;
int
pad
[
2
];
int
stride
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error getting convolution properties: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
algo
==
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT
)
{
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
||
PyGpuArray_DIM
(
input
,
2
)
>
1024
||
PyGpuArray_DIM
(
input
,
3
)
>
1024
||
(
PyGpuArray_DIM
(
kerns
,
2
)
==
1
&&
PyGpuArray_DIM
(
kerns
,
3
)
==
1
))
{
algo
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
;
}
}
else
{
// algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
)
{
algo
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
;
}
}
}
*
output_algo
=
algo
;
cuda_exit
(
c
->
ctx
);
return
0
;
}
theano/gpuarray/tests/dnn_choose_gw.c
deleted
100644 → 0
浏览文件 @
6cc0c5ca
#section init_code_struct
reuse_algo
=
0
;
prev_algo
=
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
;
if
(
!
PARAMS
->
choose_once
)
{
memset
(
prev_img_dims
,
0
,
sizeof
(
prev_img_dims
));
memset
(
prev_top_dims
,
0
,
sizeof
(
prev_top_dims
));
}
#section support_code_struct
int
reuse_algo
;
cudnnConvolutionBwdFilterAlgo_t
prev_algo
;
size_t
prev_img_dims
[
5
];
size_t
prev_top_dims
[
5
];
int
APPLY_SPECIFIC
(
choose_bwd_filter_algo
)(
PyGpuArrayObject
*
input
,
PyGpuArrayObject
*
output
,
PyGpuArrayObject
*
kerns
,
cudnnConvolutionDescriptor_t
desc
,
cudnnConvolutionBwdFilterAlgo_t
*
output_algo
,
PARAMS_TYPE
*
params
)
{
PyGpuContextObject
*
c
=
input
->
context
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuDnnConv images and kernel must have the same stack size"
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
cudnnConvolutionBwdFilterAlgo_t
algo
=
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
;
cuda_enter
(
c
->
ctx
);
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_NDIM
(
input
),
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
else
if
(
PyGpuArray_NDIM
(
input
)
==
5
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
PyGpuArray_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
expected_output_dims
[
4
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
],
PyGpuArray_DIMS
(
output
)[
4
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
if
(
!
params
->
choose_once
)
{
reuse_algo
=
1
;
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
input
);
++
i
)
{
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
input
,
i
)
==
prev_img_dims
[
i
]);
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
output
,
i
)
==
prev_top_dims
[
i
]);
}
}
if
(
!
reuse_algo
)
{
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
if
(
params
->
choose_time
)
{
int
count
;
cudnnConvolutionBwdFilterAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
return
-
1
;
}
err
=
cudnnFindConvolutionBackwardFilterAlgorithmEx
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
algo
=
choice
.
algo
;
}
else
{
err
=
cudnnGetConvolutionBackwardFilterAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
prev_algo
=
algo
;
}
else
{
algo
=
prev_algo
;
}
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
}
else
{
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
input
);
i
++
)
{
prev_img_dims
[
i
]
=
PyGpuArray_DIM
(
input
,
i
);
prev_top_dims
[
i
]
=
PyGpuArray_DIM
(
output
,
i
);
}
}
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024.
// If the chosen implementation is FFT, validate that it can
// be used on the current data and default to a safe implementation if it
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if
(
algo
==
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT
&&
PyGpuArray_NDIM
(
input
)
==
4
)
{
// Extract the properties of the convolution descriptor
int
nd
;
int
pad
[
2
];
int
stride
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error getting convolution properties: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
||
PyGpuArray_DIM
(
input
,
2
)
>
1024
||
PyGpuArray_DIM
(
input
,
3
)
>
1024
||
(
PyGpuArray_DIM
(
kerns
,
2
)
==
1
&&
PyGpuArray_DIM
(
kerns
,
3
)
==
1
))
{
algo
=
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
;
}
}
*
output_algo
=
algo
;
cuda_exit
(
c
->
ctx
);
return
0
;
}
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论