Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
5d6ca038
提交
5d6ca038
authored
9月 10, 2015
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add compat for v2 stuff.
上级
3a59bd8c
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
69 行增加
和
152 行删除
+69
-152
cudnn_helper.h
theano/sandbox/gpuarray/cudnn_helper.h
+69
-152
没有找到文件。
theano/sandbox/gpuarray/cudnn_helper.h
浏览文件 @
5d6ca038
...
@@ -4,193 +4,110 @@
...
@@ -4,193 +4,110 @@
#include <cudnn.h>
#include <cudnn.h>
#ifndef CUDNN_VERSION
#ifndef CUDNN_VERSION
#include <assert.h>
// Here we define the R2 API in terms of functions in the R1 interface
// This is only for what we use
static
inline
const
char
*
cudnnGetErrorString
(
cudnnStatus_t
err
)
{
#define CUDNN_VERSION -1
switch
(
err
)
{
static
inline
int
cudnnGetVersion
()
{
case
CUDNN_STATUS_SUCCESS
:
return
-
1
;
return
"The operation completed successfully."
;
case
CUDNN_STATUS_NOT_INITIALIZED
:
return
"The handle was not initialized(Is your driver recent enought?)."
;
case
CUDNN_STATUS_ALLOC_FAILED
:
return
"Ressource allocation failed inside the library."
;
case
CUDNN_STATUS_BAD_PARAM
:
return
"An incorrect value was passed in."
;
case
CUDNN_STATUS_ARCH_MISMATCH
:
return
"The current GPU does not support the required features (only cc 3.0+ are supported)."
;
case
CUDNN_STATUS_MAPPING_ERROR
:
return
"An access to GPU memory space failed (probably due to a failure to bind texture)."
;
case
CUDNN_STATUS_EXECUTION_FAILED
:
return
"A kernel failed to execute."
;
case
CUDNN_STATUS_INTERNAL_ERROR
:
return
"An internal cuDNN operation failed."
;
case
CUDNN_STATUS_NOT_SUPPORTED
:
return
"The combination of parameters is not currently supported."
;
default:
return
"Unknown error code."
;
}
}
}
#endif
// some macros to help support cudnn R1 while using R2 code.
#include <assert.h>
#define cudnnCreateTensorDescriptor cudnnCreateTensor4dDescriptor
#define cudnnDestroyTensorDescriptor cudnnDestroyTensor4dDescriptor
#define cudnnSetFilter4dDescriptor cudnnSetFilterDescriptor
typedef
cudnnTensor4dDescriptor_t
cudnnTensorDescriptor_t
;
static
inline
cudnnStatus_t
#if CUDNN_VERSION < 3000
cudnnGetConvolution2dForwardOutputDim
(
// Here we define the R3 API in terms of functions in the R2 interface
const
cudnnConvolutionDescriptor_t
convDesc
,
// This is only for what we use
const
cudnnTensorDescriptor_t
inputTensorDesc
,
const
cudnnFilterDescriptor_t
filterDesc
,
int
*
n
,
int
*
c
,
int
*
h
,
int
*
w
)
{
return
cudnnGetOutputTensor4dDim
(
convDesc
,
CUDNN_CONVOLUTION_FWD
,
n
,
c
,
h
,
w
);
}
typedef
int
cudnnConvolutionFwdAlgo_t
;
typedef
int
cudnnConvolutionBwdDataAlgo_t
;
typedef
int
cudnnConvolutionFwdPreference_t
;
#define CUDNN_CONVOLUTION_FWD_NO_WORKSPACE 0
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 0
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 1
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT 2
static
inline
cudnnStatus_t
static
cudnnStatus_t
cudnnGetConvolutionBackwardDataWorkspaceSize
(
cudnnGetConvolutionForwardAlgorithm
(
cudnnHandle_t
handle
,
cudnnHandle_t
handle
,
const
cudnnTensorDescriptor_t
srcDesc
,
const
cudnnFilterDescriptor_t
filterDesc
,
const
cudnnFilterDescriptor_t
filterDesc
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
cudnnTensorDescriptor_t
destDesc
,
const
cudnnTensorDescriptor_t
gradDesc
,
cudnnConvolutionFwdPreference_t
preference
,
cudnnConvolutionBwdDataAlgo_t
algo
,
size_t
memoryLimitInbytes
,
size_t
*
sizeInBytes
)
{
cudnnConvolutionFwdAlgo_t
*
algo
)
{
*
algo
=
0
;
return
CUDNN_STATUS_SUCCESS
;
}
static
inline
cudnnStatus_t
cudnnGetConvolutionForwardWorkspaceSize
(
cudnnHandle_t
handle
,
const
cudnnTensorDescriptor_t
srcDesc
,
const
cudnnFilterDescriptor_t
filterDesc
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
cudnnTensor4dDescriptor_t
destDesc
,
cudnnConvolutionFwdAlgo_t
algo
,
size_t
*
sizeInBytes
)
{
*
sizeInBytes
=
0
;
*
sizeInBytes
=
0
;
return
CUDNN_STATUS_SUCCESS
;
return
CUDNN_STATUS_SUCCESS
;
}
}
static
cudnnStatus_t
cudnnConvolutionBackwardData_v3
(
static
inline
cudnnStatus_t
cudnnConvolutionForward_v2
(
cudnnHandle_t
handle
,
cudnnHandle_t
handle
,
const
void
*
alpha
,
const
void
*
alpha
,
const
cudnnTensorDescriptor_t
srcDesc
,
const
void
*
srcData
,
const
cudnnFilterDescriptor_t
filterDesc
,
const
cudnnFilterDescriptor_t
filterDesc
,
const
void
*
filterData
,
const
void
*
filterData
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
void
*
diffData
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
cudnnConvolutionDescriptor_t
convDesc
,
cudnnConvolution
Fwd
Algo_t
algo
,
cudnnConvolution
BwdData
Algo_t
algo
,
void
*
work
S
pace
,
void
*
work
s
pace
,
size_t
work
S
paceSizeInBytes
,
size_t
work
s
paceSizeInBytes
,
const
void
*
beta
,
const
void
*
beta
,
const
cudnnTensorDescriptor_t
destDesc
,
const
cudnnTensorDescriptor_t
gradDesc
,
void
*
destData
)
{
void
*
gradData
)
{
assert
(
*
(
float
*
)
alpha
==
1
.
0
);
return
cudnnConvolutionBackwardData
(
cudnnAccumulateResult_t
r
;
handle
,
if
(
*
(
float
*
)
beta
==
0
.
0
)
{
alpha
,
r
=
CUDNN_RESULT_NO_ACCUMULATE
;
filterDesc
,
}
else
if
(
*
(
float
*
)
beta
==
1
.
0
)
{
filterData
,
r
=
CUDNN_RESULT_ACCUMULATE
;
diffDesc
,
}
else
{
diffData
,
assert
(
0
&&
"beta must be 0.0 or 1.0"
);
convDesc
,
}
beta
,
return
cudnnConvolutionForward
(
handle
,
srcDesc
,
srcData
,
gradDesc
,
filterDesc
,
filterData
,
gradData
);
convDesc
,
destDesc
,
destData
,
r
);
}
}
#define cudnnConvolutionForward cudnnConvolutionForward_v2
static
inline
cudnnStatus_t
typedef
int
cudnnConvolutionBwdFilterAlgo_t
;
cudnnConvolutionBackwardFilter_v2
(
cudnnHandle_t
handle
,
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 0
const
void
*
alpha
,
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 1
const
cudnnTensorDescriptor_t
srcDesc
,
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT 2
const
void
*
srcData
,
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT 3
static
cudnnStatus_t
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
cudnnHandle_t
handle
,
const
cudnnTensorDescriptor_t
filterDesc
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
void
*
diffData
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
void
*
beta
,
const
cudnnFilterDescriptor_t
gradDesc
,
const
cudnnFilterDescriptor_t
gradDesc
,
void
*
gradData
)
{
cudnnConvolutionBwdDataAlgo_t
algo
,
assert
(
*
(
float
*
)
alpha
==
1
.
0
);
size_t
*
sizeInBytes
)
{
cudnnAccumulateResult_t
r
;
*
sizeInBytes
=
0
;
if
(
*
(
float
*
)
beta
==
0
.
0
)
{
return
CUDNN_STATUS_SUCCESS
;
r
=
CUDNN_RESULT_NO_ACCUMULATE
;
}
else
if
(
*
(
float
*
)
beta
==
1
.
0
)
{
r
=
CUDNN_RESULT_ACCUMULATE
;
}
else
{
assert
(
0
&&
"beta must be 0.0 or 1.0"
);
}
return
cudnnConvolutionBackwardFilter
(
handle
,
srcDesc
,
srcData
,
diffDesc
,
diffData
,
convDesc
,
gradDesc
,
gradData
,
r
);
}
}
#define cudnnConvolutionBackwardFilter cudnnConvolutionBackwardFilter_v2
static
cudnnStatus_t
cudnnConvolutionBackwardFilter_v3
(
cudnnHandle_t
handle
,
static
inline
cudnnStatus_t
cudnnConvolutionBackwardData_v2
(
cudnnHandle_t
handle
,
const
void
*
alpha
,
const
void
*
alpha
,
const
cudnn
FilterDescriptor_t
filter
Desc
,
const
cudnn
TensorDescriptor_t
src
Desc
,
const
void
*
filter
Data
,
const
void
*
src
Data
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
void
*
diffData
,
const
void
*
diffData
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
cudnnConvolutionDescriptor_t
convDesc
,
cudnnConvolutionBwdFilterAlgo_t
algo
,
void
*
workspace
,
size_t
workspaceSizeInBytes
,
const
void
*
beta
,
const
void
*
beta
,
const
cudnn
Tenso
rDescriptor_t
gradDesc
,
const
cudnn
Filte
rDescriptor_t
gradDesc
,
void
*
gradData
)
{
void
*
gradData
)
{
assert
(
*
(
float
*
)
alpha
==
1
.
0
);
return
cudnnConvolutionBackwardFilter
(
cudnnAccumulateResult_t
r
;
handle
,
if
(
*
(
float
*
)
beta
==
0
.
0
)
{
alpha
,
r
=
CUDNN_RESULT_NO_ACCUMULATE
;
srcDesc
,
}
else
if
(
*
(
float
*
)
beta
==
1
.
0
)
{
srcData
,
r
=
CUDNN_RESULT_ACCUMULATE
;
diffDesc
,
}
else
{
diffData
,
assert
(
0
&&
"beta must be 0.0 or 1.0"
);
convDesc
,
}
beta
,
/* This function needs the casting because its params are not
gradDesc
,
declared as const */
gradData
);
return
cudnnConvolutionBackwardData
(
handle
,
(
cudnnFilterDescriptor_t
)
filterDesc
,
filterData
,
(
cudnnTensorDescriptor_t
)
diffDesc
,
diffData
,
(
cudnnConvolutionDescriptor_t
)
convDesc
,
(
cudnnTensorDescriptor_t
)
gradDesc
,
gradData
,
r
);
}
}
#define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2
//Needed for R2 rc2
# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING CUDNN_POOLING_AVERAGE
#else
// r2 rc1 and rc2 do not have the same macro defined
// I didn't checked if this the right combination, but as we do not wrap the padding interface, it is fine for now.
# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING ((cudnnPoolingMode_t)1)
#endif
#endif
#endif
#endif
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论