Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a9a105f6
提交
a9a105f6
authored
7月 28, 2017
作者:
Pascal Lamblin
提交者:
GitHub
7月 28, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6166 from notoraptor/fixes-and-debug-messages
Add DEBUG messages into cuDNN conv codes.
上级
173f72bf
00466269
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
122 行增加
和
56 行删除
+122
-56
op.py
theano/gof/op.py
+1
-1
dnn_fwd.c
theano/gpuarray/dnn_fwd.c
+58
-19
dnn_gi.c
theano/gpuarray/dnn_gi.c
+33
-19
dnn_gw.c
theano/gpuarray/dnn_gw.c
+30
-17
没有找到文件。
theano/gof/op.py
浏览文件 @
a9a105f6
...
...
@@ -442,7 +442,7 @@ class CLinkerOp(CLinkerObject):
The subclass does not override this method.
"""
raise
utils
.
MethodNotDefined
(
"c_init_code_
apply
"
,
type
(
self
),
raise
utils
.
MethodNotDefined
(
"c_init_code_
struct
"
,
type
(
self
),
self
.
__class__
.
__name__
)
def
c_support_code_struct
(
self
,
node
,
name
):
...
...
theano/gpuarray/dnn_fwd.c
浏览文件 @
a9a105f6
#section init_code_struct
if
(
PARAMS
->
choose_algo
)
{
reuse_algo
=
0
;
prev_algo
=
PARAMS
->
conv_algo
;
if
(
!
PARAMS
->
choose_once
)
{
memset
(
prev_img_dims
,
0
,
sizeof
(
prev_img_dims
));
memset
(
prev_kern_dims
,
0
,
sizeof
(
prev_kern_dims
));
}
}
reuse_algo
=
0
;
prev_algo
=
PARAMS
->
conv_algo
;
memset
(
prev_img_dims
,
0
,
sizeof
(
prev_img_dims
));
memset
(
prev_kern_dims
,
0
,
sizeof
(
prev_kern_dims
));
#section support_code_struct
...
...
@@ -83,6 +79,9 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
size_t
output_offset
=
PyGpuArray_STRIDE
(
*
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionFwdAlgo_t
algo
=
params
->
conv_algo
;
#ifdef DEBUG
char
algorithm_name
[
128
];
#endif
cuda_enter
(
c
->
ctx
);
...
...
@@ -138,6 +137,19 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return
1
;
}
algo
=
choice
.
algo
;
#ifdef DEBUG
if
(
count
==
0
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"No best-timed conv fwd algorithm found"
);
return
1
;
}
else
if
(
choice
.
status
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error getting best-timed FWD algo: %s"
,
cudnnGetErrorString
(
choice
.
status
));
return
1
;
}
// Else, count is necessarly 1 for current implementation.
#endif
}
else
{
err
=
cudnnGetConvolutionForwardAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
...
...
@@ -156,6 +168,16 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
algo
=
prev_algo
;
}
#ifdef DEBUG
if
(
0
!=
theano_enum_to_string_cudnnConvolutionFwdAlgo_t
(
algo
,
algorithm_name
))
return
1
;
// NB: This is printed only when algorithm is chosen at runtime.
if
(
reuse_algo
)
fprintf
(
stderr
,
"(reused %s)
\n
"
,
algorithm_name
);
else
fprintf
(
stderr
,
"(using %s)
\n
"
,
algorithm_name
);
#endif
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
}
else
{
...
...
@@ -164,15 +186,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
prev_kern_dims
[
i
]
=
PyGpuArray_DIM
(
kerns
,
i
);
}
}
#ifdef DEBUG
char
algorithm_name
[
128
];
if
(
0
!=
theano_enum_to_string_cudnnConvolutionFwdAlgo_t
(
algo
,
algorithm_name
))
{
return
1
;
};
// NB: This is printed only when algorithm is chosen at runtime.
fprintf
(
stderr
,
"(using %s) "
,
algorithm_name
);
#endif
}
/* Only these algos are supported for 3d conv with cuDNN >= V5.1. */
...
...
@@ -180,14 +193,27 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
!
(
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
||
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
||
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
))
{
#ifdef DEBUG
if
(
0
!=
theano_enum_to_string_cudnnConvolutionFwdAlgo_t
(
algo
,
algorithm_name
))
return
1
;
fprintf
(
stderr
,
"(%s unsupported for 3D: fallback to CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM)
\n
"
,
algorithm_name
);
#endif
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
}
// Algo `small` does not work for a batch size > 2^16, with cuDNN >= V5.1.
// Issue should be resolved for cuDNN > V6.0.
if
(
cudnnGetVersion
()
<
6100
&&
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
&&
PyGpuArray_DIM
(
input
,
0
)
>
65536
)
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
{
#ifdef DEBUG
fprintf
(
stderr
,
"(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM "
"will fail with batch size > 2^16, fallback to CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM)
\n
"
);
#endif
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
}
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
...
...
@@ -197,6 +223,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
/* NB:
TODO: These checkings seems outdated for FFT algorithms with cuDNN >= 5.1.
New conditions apply and may depend on number of dimensions (2D or 3D)
e.g. for FFT_TILING.
TODO: More globally, how to handle CUDNN_STATUS_NOT_SUPPORTED with unsupported algorithms?
*/
if
((
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT
||
algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
)
&&
PyGpuArray_NDIM
(
input
)
==
4
)
{
...
...
@@ -245,7 +277,14 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
if
(
err
==
CUDNN_STATUS_NOT_SUPPORTED
)
{
// Fallback to none algo if not supported
// TODO: Print a warning
#ifdef DEBUG
if
(
0
!=
theano_enum_to_string_cudnnConvolutionFwdAlgo_t
(
algo
,
algorithm_name
))
return
1
;
fprintf
(
stderr
,
"(%s error getting worksize: "
"fallback to CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM)
\n
"
,
algorithm_name
);
#endif
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM
;
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
params
->
handle
,
...
...
theano/gpuarray/dnn_gi.c
浏览文件 @
a9a105f6
#section init_code_struct
if
(
PARAMS
->
choose_algo
)
{
reuse_algo
=
0
;
prev_algo
=
PARAMS
->
conv_algo
;
if
(
!
PARAMS
->
choose_once
)
{
memset
(
prev_kern_dims
,
0
,
sizeof
(
prev_kern_dims
));
memset
(
prev_top_dims
,
0
,
sizeof
(
prev_top_dims
));
}
}
reuse_algo
=
0
;
prev_algo
=
PARAMS
->
conv_algo
;
memset
(
prev_kern_dims
,
0
,
sizeof
(
prev_kern_dims
));
memset
(
prev_top_dims
,
0
,
sizeof
(
prev_top_dims
));
#section support_code_struct
int
reuse_algo
;
cudnnConvolutionBwdDataAlgo_t
prev_algo
;
size_t
prev_kern_dims
[
5
]
=
{
0
}
;
size_t
prev_top_dims
[
5
]
=
{
0
}
;
size_t
prev_kern_dims
[
5
];
size_t
prev_top_dims
[
5
];
int
APPLY_SPECIFIC
(
conv_gi
)(
PyGpuArrayObject
*
kerns
,
PyGpuArrayObject
*
output
,
...
...
@@ -82,6 +78,9 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
size_t
output_offset
=
PyGpuArray_STRIDE
(
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionBwdDataAlgo_t
algo
=
params
->
conv_algo
;
#ifdef DEBUG
char
algorithm_name
[
128
];
#endif
cuda_enter
(
c
->
ctx
);
...
...
@@ -178,6 +177,19 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
algo
=
choice
.
algo
;
#ifdef DEBUG
if
(
count
==
0
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"No best-timed conv gradinput algorithm found"
);
return
1
;
}
else
if
(
choice
.
status
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error getting best-timed gradinput algo: %s"
,
cudnnGetErrorString
(
choice
.
status
));
return
1
;
}
// Else, count is necessarly 1 for current implementation.
#endif
}
else
{
err
=
cudnnGetConvolutionBackwardDataAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
...
...
@@ -195,6 +207,17 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo
=
prev_algo
;
}
#ifdef DEBUG
char
algorithm_name
[
128
];
if
(
0
!=
theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t
(
algo
,
algorithm_name
))
return
1
;
// NB: This is printed only when algorithm is chosen at runtime.
if
(
reuse_algo
)
fprintf
(
stderr
,
"(reused %s)
\n
"
,
algorithm_name
);
else
fprintf
(
stderr
,
"(using %s)
\n
"
,
algorithm_name
);
#endif
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
}
else
{
...
...
@@ -203,15 +226,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
prev_top_dims
[
i
]
=
PyGpuArray_DIM
(
output
,
i
);
}
}
#ifdef DEBUG
char
algorithm_name
[
128
];
if
(
0
!=
theano_enum_to_string_cudnnConvolutionBwdDataAlgo_t
(
algo
,
algorithm_name
))
{
return
1
;
};
// NB: This is printed only when algorithm is chosen at runtime.
fprintf
(
stderr
,
"(using %s) "
,
algorithm_name
);
#endif
}
// The FFT implementation does not support strides, 1x1 filters or inputs
...
...
theano/gpuarray/dnn_gw.c
浏览文件 @
a9a105f6
#section init_code_struct
if
(
PARAMS
->
choose_algo
)
{
reuse_algo
=
0
;
prev_algo
=
PARAMS
->
conv_algo
;
if
(
!
PARAMS
->
choose_once
)
{
memset
(
prev_img_dims
,
0
,
sizeof
(
prev_img_dims
));
memset
(
prev_top_dims
,
0
,
sizeof
(
prev_top_dims
));
}
}
reuse_algo
=
0
;
prev_algo
=
PARAMS
->
conv_algo
;
memset
(
prev_img_dims
,
0
,
sizeof
(
prev_img_dims
));
memset
(
prev_top_dims
,
0
,
sizeof
(
prev_top_dims
));
#section support_code_struct
...
...
@@ -83,6 +79,9 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
size_t
output_offset
=
PyGpuArray_STRIDE
(
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionBwdFilterAlgo_t
algo
=
params
->
conv_algo
;
#ifdef DEBUG
char
algorithm_name
[
128
];
#endif
cuda_enter
(
c
->
ctx
);
...
...
@@ -180,6 +179,19 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
algo
=
choice
.
algo
;
#ifdef DEBUG
if
(
count
==
0
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"No best-timed conv gradweight algorithm found"
);
return
1
;
}
else
if
(
choice
.
status
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error getting best-timed gradweight algo: %s"
,
cudnnGetErrorString
(
choice
.
status
));
return
1
;
}
// Else, count is necessarly 1 for current implementation.
#endif
}
else
{
err
=
cudnnGetConvolutionBackwardFilterAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
...
...
@@ -198,6 +210,16 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo
=
prev_algo
;
}
#ifdef DEBUG
if
(
0
!=
theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t
(
algo
,
algorithm_name
))
return
1
;
// NB: This is printed only when algorithm is chosen at runtime.
if
(
reuse_algo
)
fprintf
(
stderr
,
"(reused %s)
\n
"
,
algorithm_name
);
else
fprintf
(
stderr
,
"(using %s)
\n
"
,
algorithm_name
);
#endif
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
}
else
{
...
...
@@ -206,15 +228,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
prev_top_dims
[
i
]
=
PyGpuArray_DIM
(
output
,
i
);
}
}
#ifdef DEBUG
char
algorithm_name
[
128
];
if
(
0
!=
theano_enum_to_string_cudnnConvolutionBwdFilterAlgo_t
(
algo
,
algorithm_name
))
{
return
1
;
};
// NB: This is printed only when algorithm is chosen at runtime.
fprintf
(
stderr
,
"(using %s) "
,
algorithm_name
);
#endif
}
// The FFT implementation does not support strides, 1x1 filters or inputs
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论