Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
c5944c7d
提交
c5944c7d
authored
8月 20, 2017
作者:
Boris Fomitchev
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fixed overflow with workspace size, tensor_op setting for FindEx
上级
e596e80e
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
64 行增加
和
72 行删除
+64
-72
dnn_conv_base.c
theano/gpuarray/c_code/dnn_conv_base.c
+19
-5
dnn_fwd.c
theano/gpuarray/c_code/dnn_fwd.c
+14
-22
dnn_gi.c
theano/gpuarray/c_code/dnn_gi.c
+15
-23
dnn_gw.c
theano/gpuarray/c_code/dnn_gw.c
+16
-22
没有找到文件。
theano/gpuarray/c_code/dnn_conv_base.c
浏览文件 @
c5944c7d
...
@@ -26,6 +26,20 @@ static int c_check_groups_for_conv(cudnnConvolutionDescriptor_t desc, int groups
...
@@ -26,6 +26,20 @@ static int c_check_groups_for_conv(cudnnConvolutionDescriptor_t desc, int groups
#endif
#endif
}
}
static
int
c_set_math_type_for_conv
(
cudnnConvolutionDescriptor_t
desc
,
cudnnMathType_t
mathtype
)
{
#if CUDNN_MAJOR >= 7
// CUDNN7: need to set math type
cudnnStatus_t
err
=
cudnnSetConvolutionMathType
(
desc
,
mathtype
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error setting math type for convolution : %s"
,
cudnnGetErrorString
(
err
));
return
-
1
;
}
#endif
return
1
;
}
#section init_code_struct
#section init_code_struct
cudnnStatus_t
APPLY_SPECIFIC
(
err
);
cudnnStatus_t
APPLY_SPECIFIC
(
err
);
...
@@ -83,19 +97,19 @@ static cudnnStatus_t checkCudnnStatus(cudnnStatus_t err, const char* msg)
...
@@ -83,19 +97,19 @@ static cudnnStatus_t checkCudnnStatus(cudnnStatus_t err, const char* msg)
return
err
;
return
err
;
}
}
static
in
t
static
size_
t
c_get_largest_free_block_size
(
PyGpuContextObject
*
c
)
c_get_largest_free_block_size
(
PyGpuContextObject
*
c
)
{
{
size_t
free
=
0
;
size_t
max
free
=
0
;
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
free
);
int
err2
=
gpucontext_property
(
c
->
ctx
,
GA_CTX_PROP_LARGEST_MEMBLOCK
,
&
max
free
);
if
(
err2
!=
GA_NO_ERROR
)
{
if
(
err2
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
PyErr_Format
(
PyExc_RuntimeError
,
"Error when trying to find the "
"memory information on the GPU"
);
"memory information on the GPU"
);
}
}
// Guess 4Mb if the info is not available
// Guess 4Mb if the info is not available
if
(
free
==
0
)
free
=
4
*
1024
*
1024
;
if
(
maxfree
==
0
)
max
free
=
4
*
1024
*
1024
;
return
free
;
return
max
free
;
}
}
/** Check if convolution output tensor has expected dimensions
/** Check if convolution output tensor has expected dimensions
...
...
theano/gpuarray/c_code/dnn_fwd.c
浏览文件 @
c5944c7d
...
@@ -186,11 +186,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -186,11 +186,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
std
::
string
hashkey
;
std
::
string
hashkey
;
size_t
free
=
c_get_largest_free_block_size
(
c
);
if
(
PyErr_Occurred
())
return
1
;
cuda_enter
(
c
->
ctx
);
cuda_enter
(
c
->
ctx
);
size_t
maxfree
=
c_get_largest_free_block_size
(
c
);
if
(
PyErr_Occurred
())
return
1
;
if
(
params
->
choose_algo
)
{
if
(
params
->
choose_algo
)
{
if
(
!
reuse_algo
)
{
if
(
!
reuse_algo
)
{
...
@@ -220,12 +221,14 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -220,12 +221,14 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cudnnConvolutionFwdAlgoPerf_t
choice
;
cudnnConvolutionFwdAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
gpudata
*
tmpmem
;
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
max
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate
working GPU memory
"
);
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate
GPU memory for FindEx
"
);
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
-
1
;
return
-
1
;
}
}
// set the 'tensor math ok' flag
c_set_math_type_for_conv
(
desc
,
CUDNN_TENSOR_OP_MATH
);
// We don't sync the buffer as we don't care about the values.
// We don't sync the buffer as we don't care about the values.
err
=
cudnnFindConvolutionForwardAlgorithmEx
(
err
=
cudnnFindConvolutionForwardAlgorithmEx
(
...
@@ -233,7 +236,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -233,7 +236,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
),
desc
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
max
free
);
gpudata_release
(
tmpmem
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -269,7 +272,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -269,7 +272,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
err
=
cudnnGetConvolutionForwardAlgorithm
(
err
=
cudnnGetConvolutionForwardAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
output
),
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
max
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
"error selecting convolution algo: %s"
,
...
@@ -283,8 +286,9 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -283,8 +286,9 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
}
}
}
}
if
(
dnn_conv_fwd_fallback
(
&
algo
,
input
,
kerns
,
desc
)
!=
0
)
{
if
(
c_set_math_type_for_conv
(
desc
,
mathtype
)
==
-
1
||
dnn_conv_fwd_fallback
(
&
algo
,
input
,
kerns
,
desc
)
!=
0
)
{
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
...
@@ -343,12 +347,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -343,12 +347,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
fprintf
(
stderr
,
"(using %s
%s
%s%s%s, ws:%ld, hash:%s)
\n
"
,
fprintf
(
stderr
,
"(using %s
%s
%s%s%s, ws:%ld, hash:%s)
\n
"
,
algorithm_name
,
algorithm_name
,
mathtype
==
CUDNN_TENSOR_OP_MATH
?
"[T]"
:
""
,
params
->
choose_time
?
"(timed)"
:
""
,
params
->
choose_time
?
"(timed)"
:
""
,
reuse_algo
?
"(reused)"
:
""
,
reuse_algo
?
"(reused)"
:
""
,
use_cached
?
"(cache)"
:
""
,
use_cached
?
"(cache)"
:
""
,
mathtype
==
CUDNN_TENSOR_OP_MATH
?
"(tensor op)"
:
""
,
worksize
,
worksize
,
hashkey
.
c_str
()
hashkey
.
c_str
()
);
);
...
@@ -361,18 +365,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -361,18 +365,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
{
{
gpudata
*
workspace
=
0
;
gpudata
*
workspace
=
0
;
#if CUDNN_MAJOR >= 7
// CUDNN7: need to set math type
err
=
cudnnSetConvolutionMathType
(
desc
,
mathtype
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error setting math type for convolution : %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
#endif
/*
/*
* This is less than ideal since we need to free it after (which
* This is less than ideal since we need to free it after (which
* introduces a synchronization point. But we don't have a module
* introduces a synchronization point. But we don't have a module
...
...
theano/gpuarray/c_code/dnn_gi.c
浏览文件 @
c5944c7d
...
@@ -156,7 +156,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -156,7 +156,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
std
::
string
hashkey
;
std
::
string
hashkey
;
size_t
free
=
c_get_largest_free_block_size
(
c
);
size_t
max
free
=
c_get_largest_free_block_size
(
c
);
if
(
PyErr_Occurred
())
return
1
;
if
(
PyErr_Occurred
())
return
1
;
cuda_enter
(
c
->
ctx
);
cuda_enter
(
c
->
ctx
);
...
@@ -190,7 +190,10 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -190,7 +190,10 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cudnnConvolutionBwdDataAlgoPerf_t
choice
;
cudnnConvolutionBwdDataAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
gpudata
*
tmpmem
;
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
// set the 'tensor math ok' flag
c_set_math_type_for_conv
(
desc
,
CUDNN_TENSOR_OP_MATH
);
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
maxfree
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
...
@@ -201,7 +204,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -201,7 +204,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
params
->
handle
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
params
->
handle
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
max
free
);
gpudata_release
(
tmpmem
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -235,7 +238,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -235,7 +238,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
err
=
cudnnGetConvolutionBackwardDataAlgorithm
(
err
=
cudnnGetConvolutionBackwardDataAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
params
->
handle
,
APPLY_SPECIFIC
(
kerns
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
input
),
desc
,
APPLY_SPECIFIC
(
input
),
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
,
max
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
...
@@ -248,8 +251,9 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -248,8 +251,9 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
}
}
}
}
}
if
(
dnn_conv_gi_fallback
(
&
algo
,
*
input
,
kerns
,
desc
)
!=
0
)
{
if
(
c_set_math_type_for_conv
(
desc
,
mathtype
)
==
-
1
||
dnn_conv_gi_fallback
(
&
algo
,
*
input
,
kerns
,
desc
)
!=
0
)
{
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
...
@@ -313,23 +317,11 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -313,23 +317,11 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
}
#endif
#endif
if
(
params
->
choose_once
)
{
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
reuse_algo
=
1
;
}
}
gpudata
*
workspace
=
0
;
gpudata
*
workspace
=
0
;
#if CUDNN_MAJOR >= 7
// CUDNN7: need to set math type
err
=
cudnnSetConvolutionMathType
(
desc
,
mathtype
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error setting math type for convolution : %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
#endif
if
(
worksize
!=
0
)
{
if
(
worksize
!=
0
)
{
workspace
=
gpudata_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
workspace
=
gpudata_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
if
(
workspace
==
NULL
)
{
if
(
workspace
==
NULL
)
{
...
...
theano/gpuarray/c_code/dnn_gw.c
浏览文件 @
c5944c7d
...
@@ -143,7 +143,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -143,7 +143,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
std
::
string
hashkey
;
std
::
string
hashkey
;
size_t
free
=
c_get_largest_free_block_size
(
c
);
size_t
max
free
=
c_get_largest_free_block_size
(
c
);
if
(
PyErr_Occurred
())
return
1
;
if
(
PyErr_Occurred
())
return
1
;
cuda_enter
(
c
->
ctx
);
cuda_enter
(
c
->
ctx
);
...
@@ -176,8 +176,11 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -176,8 +176,11 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
int
count
;
int
count
;
cudnnConvolutionBwdFilterAlgoPerf_t
choice
;
cudnnConvolutionBwdFilterAlgoPerf_t
choice
;
gpudata
*
tmpmem
;
gpudata
*
tmpmem
;
// set the 'tensor math ok' flag
c_set_math_type_for_conv
(
desc
,
CUDNN_TENSOR_OP_MATH
);
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
free
,
NULL
,
0
,
NULL
);
tmpmem
=
gpudata_alloc
(
c
->
ctx
,
max
free
,
NULL
,
0
,
NULL
);
if
(
tmpmem
==
NULL
)
{
if
(
tmpmem
==
NULL
)
{
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
PyErr_SetString
(
PyExc_MemoryError
,
"Could not allocate working GPU memory"
);
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
...
@@ -188,7 +191,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -188,7 +191,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
params
->
handle
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
params
->
handle
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
),
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
free
);
1
,
&
count
,
&
choice
,
*
(
void
**
)
tmpmem
,
max
free
);
gpudata_release
(
tmpmem
);
gpudata_release
(
tmpmem
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -224,7 +227,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -224,7 +227,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
err
=
cudnnGetConvolutionBackwardFilterAlgorithm
(
err
=
cudnnGetConvolutionBackwardFilterAlgorithm
(
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
params
->
handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
output
),
desc
,
APPLY_SPECIFIC
(
kerns
),
desc
,
APPLY_SPECIFIC
(
kerns
),
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
free
,
&
algo
);
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
,
max
free
,
&
algo
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"error selecting convolution algo: %s"
,
"error selecting convolution algo: %s"
,
...
@@ -238,8 +241,9 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -238,8 +241,9 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
}
}
}
/* choose_algo */
}
/* choose_algo */
if
(
dnn_conv_gw_fallback
(
&
algo
,
input
,
*
kerns
,
desc
)
!=
0
)
{
if
(
c_set_math_type_for_conv
(
desc
,
mathtype
)
==
-
1
||
dnn_conv_gw_fallback
(
&
algo
,
input
,
*
kerns
,
desc
)
!=
0
)
{
cuda_exit
(
c
->
ctx
);
cuda_exit
(
c
->
ctx
);
return
1
;
return
1
;
}
}
...
@@ -303,22 +307,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -303,22 +307,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
#endif
#endif
if
(
params
->
choose_once
)
{
if
(
params
->
choose_once
)
{
reuse_algo
=
1
;
reuse_algo
=
1
;
}
}
gpudata
*
workspace
=
0
;
gpudata
*
workspace
=
0
;
#if CUDNN_MAJOR >= 7
// CUDNN7: need to set math type
err
=
cudnnSetConvolutionMathType
(
desc
,
mathtype
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"error setting math type for convolution : %s"
,
cudnnGetErrorString
(
err
));
cuda_exit
(
c
->
ctx
);
return
1
;
}
#endif
if
(
worksize
!=
0
)
{
if
(
worksize
!=
0
)
{
workspace
=
gpudata_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
workspace
=
gpudata_alloc
(
c
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
if
(
workspace
==
NULL
)
{
if
(
workspace
==
NULL
)
{
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论