Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
1b6e6389
提交
1b6e6389
authored
6月 07, 2017
作者:
notoraptor
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cancel some changes into CEnumType.
Cancel changes into dnn_fwd.c Heavy simplification of check_dnn. Make check_dnn runnable as a python script.
上级
6cc0c5ca
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
119 行增加
和
642 行删除
+119
-642
type.py
theano/gof/type.py
+1
-25
cudnn_defs.py
theano/gpuarray/cudnn_defs.py
+118
-13
check_dnn.py
theano/gpuarray/tests/check_dnn.py
+0
-0
check_dnn_doc.py
theano/gpuarray/tests/check_dnn_doc.py
+0
-0
dnn_choose_fwd.c
theano/gpuarray/tests/dnn_choose_fwd.c
+0
-212
dnn_choose_gi.c
theano/gpuarray/tests/dnn_choose_gi.c
+0
-202
dnn_choose_gw.c
theano/gpuarray/tests/dnn_choose_gw.c
+0
-190
没有找到文件。
theano/gof/type.py
浏览文件 @
1b6e6389
...
...
@@ -909,11 +909,7 @@ class EnumType(Type, dict):
.. note::
:class:`EnumType` is not complete and should never be used for regular graph operations.
:class:`EnumList` is not complete and should never be used for regular graph operations.
**:class:`CEnumType` is complete.**
This Type (and subclasses) is not complete and should never be used for regular graph operations.
"""
...
...
@@ -1053,9 +1049,6 @@ class EnumType(Type, dict):
#ifndef PyInt_AsLong
#define PyInt_AsLong PyLong_AsLong
#endif
#ifndef PyInt_FromLong
#define PyInt_FromLong PyLong_FromLong
#endif
#endif
"""
...
...
@@ -1248,22 +1241,5 @@ class CEnumType(EnumList):
"""
%
dict
(
i
=
i
,
name
=
name
,
constant_cname
=
swapped_dict
[
i
])
for
i
in
sorted
(
swapped_dict
.
keys
())),
fail
=
sub
[
'fail'
])
def
c_sync
(
self
,
name
,
sub
):
return
"""
int py_value = -1;
Py_XDECREF(py_
%(name)
s);
/* We assume that ctype is an integer type usable in a switch. */
switch (
%(name)
s) {
%(cases)
s
default:
PyErr_SetString(PyExc_ValueError, "CEnumType: cannot map C value to Python constant.");
{
%(fail)
s}
break;
}
py_
%(name)
s = PyInt_FromLong(py_value);
"""
%
dict
(
name
=
name
,
fail
=
sub
[
'fail'
],
cases
=
''
.
join
(
"""
case
%(constant_cname)
s: py_value =
%(constant_pyvalue)
d; break;
"""
%
dict
(
constant_cname
=
k
,
constant_pyvalue
=
v
)
for
k
,
v
in
sorted
(
self
.
items
(),
key
=
lambda
t
:
t
[
1
])))
def
c_code_cache_version
(
self
):
return
(
1
,
super
(
CEnumType
,
self
)
.
c_code_cache_version
())
theano/gpuarray/cudnn_defs.py
浏览文件 @
1b6e6389
...
...
@@ -19,8 +19,27 @@ from __future__ import absolute_import, print_function, division
from
theano.gof
import
CEnumType
HALF
,
FLOAT
,
DOUBLE
=
(
'float16'
,
'float32'
,
'float64'
)
TRUE_HALF_CONFIG
=
(
HALF
,
HALF
)
PSEUDO_HALF_CONFIG
=
(
HALF
,
FLOAT
)
FLOAT_CONFIG
=
(
FLOAT
,
FLOAT
)
DOUBLE_CONFIG
=
(
DOUBLE
,
DOUBLE
)
def
is_true_half_config
(
dtype
,
precision
):
return
dtype
==
precision
==
HALF
def
is_pseudo_half_config
(
dtype
,
precision
):
return
dtype
==
HALF
and
precision
==
FLOAT
def
is_float_config
(
dtype
,
precision
):
return
dtype
==
precision
==
FLOAT
def
is_double_config
(
dtype
,
precision
):
return
dtype
==
precision
==
DOUBLE
# NB: Some cuDNN algorithms are listed in cuDNN enums but not implemented.
...
...
@@ -103,22 +122,97 @@ class CuDNNV51(object):
# empty list of enum to don't crash with cudnn 5
cudnnReduceTensorOp_t
=
CEnumType
()
def
supported_precisions
(
self
,
dtype
):
def
get_supported_dtype_configs
(
self
):
"""
Return the tuple of
precisions supported by cuDNN for given input data type
.
Return the tuple of
data type configurations supported by this version of cuDNN
.
This is currently convenient for both cuDNN V5.1 and V6, as Theano does not
yet support new data types (like INT8, INT8x4, etc.).
"""
assert
dtype
in
(
HALF
,
FLOAT
,
DOUBLE
)
if
dtype
==
HALF
:
# TRUE_HALF_CONFIG, PSEUDO_HALF_CONFIG
return
(
HALF
,
FLOAT
)
if
dtype
==
FLOAT
:
# FLOAT_CONFIG
return
(
FLOAT
,)
if
dtype
==
DOUBLE
:
# DOUBLE_CONFIG
return
(
DOUBLE
,)
return
(
TRUE_HALF_CONFIG
,
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
,
DOUBLE_CONFIG
)
def
get_fwd_dtype_configs
(
self
,
check_runtime
=
None
):
# NB: "TRUE_HALF_CONFIG is only supported on architectures with true fp16 support
# (compute capability 5.3 and 6.0)". Can be checked at runtime only.
if
check_runtime
is
None
or
check_runtime
(
*
TRUE_HALF_CONFIG
):
return
self
.
get_supported_dtype_configs
()
return
(
PSEUDO_HALF_CONFIG
,
FLOAT_CONFIG
,
DOUBLE_CONFIG
)
def
get_bwd_filter_dtype_configs
(
self
,
check_runtime
=
None
):
return
self
.
get_supported_dtype_configs
()
def
get_bwd_data_dtype_configs
(
self
,
check_runtime
=
None
):
return
self
.
get_supported_dtype_configs
()
def
fwd_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
algorithms
=
self
.
cudnnConvolutionFwdAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
:
return
ndim
==
2
or
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_GEMM
:
return
ndim
==
2
and
not
is_true_half_config
(
dtype
,
precision
)
# CUDNN_CONVOLUTION_FWD_ALGO_DIRECT: not implemented.
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_FFT
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
:
if
ndim
==
2
:
return
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
# NB: For cuDNN V6:
# " Data Type Config Support: PSEUDO_HALF_CONFIG, FLOAT_CONFIG
# (DOUBLE_CONFIG is also supported when the task can be handled by 1D FFT,
# ie, one of the filter dimension, width or height is 1)"
# Could be checked only when being in C code.
if
ndim
==
3
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED
:
# NB: "If wDesc 's filter (height, width) is (5,5), data type config TRUE_HALF_CONFIG is not supported".
# We could not check it before being in C code.
return
ndim
==
2
and
not
is_double_config
(
dtype
,
precision
)
return
False
def
bwd_filter_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
algorithms
=
self
.
cudnnConvolutionBwdFilterAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
:
return
ndim
==
2
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED
:
# NB: "If wDesc 's filter (height, width) is (5,5), data type config TRUE_HALF_CONFIG is not supported".
# We could not check it before being in C code.
return
ndim
==
2
and
not
is_double_config
(
dtype
,
precision
)
return
False
def
bwd_data_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
algorithms
=
self
.
cudnnConvolutionBwdDataAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
:
return
not
is_true_half_config
(
dtype
,
precision
)
# CUDNN_CONVOLUTION_BWD_DATA_ALGO_1: all data type configs supported.
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
))
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
:
if
ndim
==
2
:
return
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
# NB: For cuDNN V6: "(DOUBLE_CONFIG is also supported when the task can be handled by 1D FFT,
# ie, one of the filter dimension, width or height is 1)"
# Could be checked only when being in C code.
if
ndim
==
3
:
return
not
is_true_half_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD
:
return
ndim
==
2
and
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED
:
# NB: "If wDesc 's filter (height, width) is (5,5), data type config TRUE_HALF_CONFIG is not supported".
# We could not check it before being in C code.
return
ndim
==
2
and
not
is_double_config
(
dtype
,
precision
)
return
False
class
CuDNNV6
(
CuDNNV51
):
...
...
@@ -162,6 +256,17 @@ class CuDNNV6(CuDNNV51):
(
'CUDNN_REDUCE_TENSOR_NORM2'
,
'norm2'
),
ctype
=
'cudnnReduceTensorOp_t'
)
def
bwd_filter_algo_supports_dtype_config
(
self
,
algo
,
dtype
,
precision
,
ndim
):
is_supported
=
super
(
CuDNNV6
,
self
)
.
bwd_filter_algo_supports_dtype_config
(
algo
,
dtype
,
precision
,
ndim
)
if
not
is_supported
:
algorithms
=
self
.
cudnnConvolutionBwdFilterAlgo_t
algo
=
algorithms
.
fromalias
(
algo
)
if
algo
==
algorithms
.
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING
:
return
ndim
==
2
and
(
is_pseudo_half_config
(
dtype
,
precision
)
or
is_float_config
(
dtype
,
precision
)
or
is_double_config
(
dtype
,
precision
))
return
is_supported
class
CuDNNV7
(
CuDNNV6
):
version
=
7
...
...
theano/gpuarray/tests/check_dnn.py
浏览文件 @
1b6e6389
差异被折叠。
点击展开。
theano/gpuarray/tests/check_dnn_doc.py
deleted
100644 → 0
浏览文件 @
6cc0c5ca
差异被折叠。
点击展开。
theano/gpuarray/tests/dnn_choose_fwd.c
deleted
100644 → 0
浏览文件 @
6cc0c5ca
#section init_code_struct
reuse_algo
=
0
;
prev_algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
if
(
!
PARAMS
->
choose_once
)
{
memset
(
prev_img_dims
,
0
,
sizeof
(
prev_img_dims
));
memset
(
prev_kern_dims
,
0
,
sizeof
(
prev_kern_dims
));
}
#section support_code_struct
int
reuse_algo
;
cudnnConvolutionFwdAlgo_t
prev_algo
;
size_t
prev_img_dims
[
5
];
size_t
prev_kern_dims
[
5
];
int
APPLY_SPECIFIC
(
choose_fwd_algo
)(
PyGpuArrayObject
*
input
,
PyGpuArrayObject
*
kerns
,
PyGpuArrayObject
*
output
,
cudnnConvolutionDescriptor_t
desc
,
cudnnConvolutionFwdAlgo_t
*
output_algo
,
PARAMS_TYPE
*
params
)
{
PyGpuContextObject
*
c
=
input
->
context
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same stack size"
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
cudnnConvolutionFwdAlgo_t
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
cuda_enter
(
c
->
ctx
);
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_NDIM
(
input
),
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
else
if
(
PyGpuArray_NDIM
(
input
)
==
5
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
PyGpuArray_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
expected_output_dims
[
4
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
],
PyGpuArray_DIMS
(
output
)[
4
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
if
(
!
params
->
choose_once
)
{
reuse_algo
=
1
;
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
input
);
++
i
)
{
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
input
,
i
)
==
prev_img_dims
[
i
]);
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
kerns
,
i
)
==
prev_kern_dims
[
i
]);
}
}
if
(
!
reuse_algo
)
{
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
if
(
params
->
choose_time
)
{
int
count
;
cudnnConvolutionFwdAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
return
-
1
;
}
err
=
cudnnFindConvolutionForwardAlgorithmEx
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
algo
=
choice
.
algo
;
}
else
{
err
=
cudnnGetConvolutionForwardAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
prev_algo
=
algo
;
}
else
{
algo
=
prev_algo
;
}
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
}
else
{
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
input
);
++
i
)
{
prev_img_dims
[
i
]
=
PyGpuArray_DIM
(
input
,
i
);
prev_kern_dims
[
i
]
=
PyGpuArray_DIM
(
kerns
,
i
);
}
}
/* These two algos are not supported for 3d conv */
if
(
PyGpuArray_NDIM
(
input
)
==
5
&&
(
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
||
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_GEMM
))
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
// does not support strides.
// If the chosen implementation is FFT or tiled-FFT, validate that it can
// be used on the current data and default to a safe implementation if it
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if
((
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT
||
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
)
&&
PyGpuArray_NDIM
(
input
)
==
4
)
{
// Extract the properties of the convolution descriptor
int
nd
;
int
pad
[
2
];
int
stride
[
2
];
int
dilation
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
dilation
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error getting convolution properties: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT
)
{
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
||
PyGpuArray_DIM
(
input
,
2
)
>
1024
||
PyGpuArray_DIM
(
input
,
3
)
>
1024
||
(
PyGpuArray_DIM
(
kerns
,
2
)
==
1
&&
PyGpuArray_DIM
(
kerns
,
3
)
==
1
))
{
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
}
}
else
{
// algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
)
{
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
}
}
}
*
output_algo
=
algo
;
cuda_exit
(
c
->
ctx
);
return
0
;
}
theano/gpuarray/tests/dnn_choose_gi.c
deleted
100644 → 0
浏览文件 @
6cc0c5ca
#section init_code_struct
reuse_algo
=
0
;
prev_algo
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
;
if
(
!
PARAMS
->
choose_once
)
{
memset
(
prev_kern_dims
,
0
,
sizeof
(
prev_kern_dims
));
memset
(
prev_top_dims
,
0
,
sizeof
(
prev_top_dims
));
}
#section support_code_struct
int
reuse_algo
;
cudnnConvolutionBwdDataAlgo_t
prev_algo
;
size_t
prev_kern_dims
[
5
];
size_t
prev_top_dims
[
5
];
int
APPLY_SPECIFIC
(
choose_bwd_data_algo
)(
PyGpuArrayObject
*
kerns
,
PyGpuArrayObject
*
output
,
PyGpuArrayObject
*
input
,
cudnnConvolutionDescriptor_t
desc
,
cudnnConvolutionBwdDataAlgo_t
*
output_algo
,
PARAMS_TYPE
*
params
)
{
PyGpuContextObject
*
c
=
kerns
->
context
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same stack size"
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
cudnnConvolutionBwdDataAlgo_t
algo
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
;
cuda_enter
(
c
->
ctx
);
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_NDIM
(
input
),
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
else
if
(
PyGpuArray_NDIM
(
input
)
==
5
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
PyGpuArray_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
expected_output_dims
[
4
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
],
PyGpuArray_DIMS
(
output
)[
4
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
if
(
!
params
->
choose_once
)
{
reuse_algo
=
1
;
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
kerns
);
++
i
)
{
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
kerns
,
i
)
==
prev_kern_dims
[
i
]);
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
output
,
i
)
==
prev_top_dims
[
i
]);
}
}
if
(
!
reuse_algo
)
{
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
if
(
params
->
choose_time
)
{
int
count
;
cudnnConvolutionBwdDataAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
return
-
1
;
}
err
=
cudnnFindConvolutionBackwardDataAlgorithmEx
(
params
->
handle
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
algo
=
choice
.
algo
;
}
else
{
err
=
cudnnGetConvolutionBackwardDataAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
prev_algo
=
algo
;
}
else
{
algo
=
prev_algo
;
}
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
}
else
{
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
kerns
);
++
i
)
{
prev_kern_dims
[
i
]
=
PyGpuArray_DIM
(
kerns
,
i
);
prev_top_dims
[
i
]
=
PyGpuArray_DIM
(
output
,
i
);
}
}
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
// does not support strides.
// If the chosen implementation is FFT or tiled-FFT, validate that it can
// be used on the current data and default to a safe implementation if it
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if
((
algo
==
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
||
algo
==
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT
)
&&
PyGpuArray_NDIM
(
kerns
)
==
4
)
{
// Extract the properties of the convolution descriptor
int
nd
;
int
pad
[
2
];
int
stride
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error getting convolution properties: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
algo
==
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT
)
{
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
||
PyGpuArray_DIM
(
input
,
2
)
>
1024
||
PyGpuArray_DIM
(
input
,
3
)
>
1024
||
(
PyGpuArray_DIM
(
kerns
,
2
)
==
1
&&
PyGpuArray_DIM
(
kerns
,
3
)
==
1
))
{
algo
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
;
}
}
else
{
// algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
)
{
algo
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0
;
}
}
}
*
output_algo
=
algo
;
cuda_exit
(
c
->
ctx
);
return
0
;
}
theano/gpuarray/tests/dnn_choose_gw.c
deleted
100644 → 0
浏览文件 @
6cc0c5ca
#section init_code_struct
reuse_algo
=
0
;
prev_algo
=
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
;
if
(
!
PARAMS
->
choose_once
)
{
memset
(
prev_img_dims
,
0
,
sizeof
(
prev_img_dims
));
memset
(
prev_top_dims
,
0
,
sizeof
(
prev_top_dims
));
}
#section support_code_struct
int
reuse_algo
;
cudnnConvolutionBwdFilterAlgo_t
prev_algo
;
size_t
prev_img_dims
[
5
];
size_t
prev_top_dims
[
5
];
int
APPLY_SPECIFIC
(
choose_bwd_filter_algo
)(
PyGpuArrayObject
*
input
,
PyGpuArrayObject
*
output
,
PyGpuArrayObject
*
kerns
,
cudnnConvolutionDescriptor_t
desc
,
cudnnConvolutionBwdFilterAlgo_t
*
output_algo
,
PARAMS_TYPE
*
params
)
{
PyGpuContextObject
*
c
=
input
->
context
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuDnnConv images and kernel must have the same stack size"
);
return
1
;
}
if
(
c_set_tensorNd
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
cudnnConvolutionBwdFilterAlgo_t
algo
=
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
;
cuda_enter
(
c
->
ctx
);
int
expected_output_dims
[
5
]
=
{
0
};
err
=
cudnnGetConvolutionNdForwardOutputDim
(
desc
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_NDIM
(
input
),
expected_output_dims
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error computing convolution output dim: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
else
if
(
PyGpuArray_NDIM
(
input
)
==
5
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
])
||
(
PyGpuArray_DIMS
(
output
)[
4
]
!=
expected_output_dims
[
4
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld"
,
expected_output_dims
[
0
],
expected_output_dims
[
1
],
expected_output_dims
[
2
],
expected_output_dims
[
3
],
expected_output_dims
[
4
],
PyGpuArray_DIMS
(
output
)[
0
],
PyGpuArray_DIMS
(
output
)[
1
],
PyGpuArray_DIMS
(
output
)[
2
],
PyGpuArray_DIMS
(
output
)[
3
],
PyGpuArray_DIMS
(
output
)[
4
]);
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
if
(
!
params
->
choose_once
)
{
reuse_algo
=
1
;
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
input
);
++
i
)
{
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
input
,
i
)
==
prev_img_dims
[
i
]);
reuse_algo
=
(
reuse_algo
&&
PyGpuArray_DIM
(
output
,
i
)
==
prev_top_dims
[
i
]);
}
}
if
(
!
reuse_algo
)
{
size_t
free
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
cuda_exit
(
c
->
ctx
);
return
1
;
}
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
if
(
params
->
choose_time
)
{
int
count
;
cudnnConvolutionBwdFilterAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
return
-
1
;
}
err
=
cudnnFindConvolutionBackwardFilterAlgorithmEx
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
algo
=
choice
.
algo
;
}
else
{
err
=
cudnnGetConvolutionBackwardFilterAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
}
prev_algo
=
algo
;
}
else
{
algo
=
prev_algo
;
}
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
}
else
{
for
(
unsigned
int
i
=
0
;
i
<
PyGpuArray_NDIM
(
input
);
i
++
)
{
prev_img_dims
[
i
]
=
PyGpuArray_DIM
(
input
,
i
);
prev_top_dims
[
i
]
=
PyGpuArray_DIM
(
output
,
i
);
}
}
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024.
// If the chosen implementation is FFT, validate that it can
// be used on the current data and default to a safe implementation if it
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if
(
algo
==
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT
&&
PyGpuArray_NDIM
(
input
)
==
4
)
{
// Extract the properties of the convolution descriptor
int
nd
;
int
pad
[
2
];
int
stride
[
2
];
int
upscale
[
2
];
cudnnConvolutionMode_t
mode
;
cudnnDataType_t
data_type
;
err
=
cudnnGetConvolutionNdDescriptor
(
desc
,
2
,
&
nd
,
pad
,
stride
,
upscale
,
&
mode
,
&
data_type
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error getting convolution properties: %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
if
(
stride
[
0
]
!=
1
||
stride
[
1
]
!=
1
||
PyGpuArray_DIM
(
input
,
2
)
>
1024
||
PyGpuArray_DIM
(
input
,
3
)
>
1024
||
(
PyGpuArray_DIM
(
kerns
,
2
)
==
1
&&
PyGpuArray_DIM
(
kerns
,
3
)
==
1
))
{
algo
=
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0
;
}
}
*
output_algo
=
algo
;
cuda_exit
(
c
->
ctx
);
return
0
;
}
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论