Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
5d6ca038
提交
5d6ca038
authored
9月 10, 2015
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add compat for v2 stuff.
上级
3a59bd8c
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
69 行增加
和
152 行删除
+69
-152
cudnn_helper.h
theano/sandbox/gpuarray/cudnn_helper.h
+69
-152
没有找到文件。
theano/sandbox/gpuarray/cudnn_helper.h
浏览文件 @
5d6ca038
...
...
@@ -4,193 +4,110 @@
#include <cudnn.h>
#ifndef CUDNN_VERSION
#include <assert.h>
// Here we define the R2 API in terms of functions in the R1 interface
// This is only for what we use
static
inline
const
char
*
cudnnGetErrorString
(
cudnnStatus_t
err
)
{
switch
(
err
)
{
case
CUDNN_STATUS_SUCCESS
:
return
"The operation completed successfully."
;
case
CUDNN_STATUS_NOT_INITIALIZED
:
return
"The handle was not initialized(Is your driver recent enought?)."
;
case
CUDNN_STATUS_ALLOC_FAILED
:
return
"Ressource allocation failed inside the library."
;
case
CUDNN_STATUS_BAD_PARAM
:
return
"An incorrect value was passed in."
;
case
CUDNN_STATUS_ARCH_MISMATCH
:
return
"The current GPU does not support the required features (only cc 3.0+ are supported)."
;
case
CUDNN_STATUS_MAPPING_ERROR
:
return
"An access to GPU memory space failed (probably due to a failure to bind texture)."
;
case
CUDNN_STATUS_EXECUTION_FAILED
:
return
"A kernel failed to execute."
;
case
CUDNN_STATUS_INTERNAL_ERROR
:
return
"An internal cuDNN operation failed."
;
case
CUDNN_STATUS_NOT_SUPPORTED
:
return
"The combination of parameters is not currently supported."
;
default:
return
"Unknown error code."
;
}
#define CUDNN_VERSION -1
static
inline
int
cudnnGetVersion
()
{
return
-
1
;
}
#endif
// some macros to help support cudnn R1 while using R2 code.
#define cudnnCreateTensorDescriptor cudnnCreateTensor4dDescriptor
#define cudnnDestroyTensorDescriptor cudnnDestroyTensor4dDescriptor
#define cudnnSetFilter4dDescriptor cudnnSetFilterDescriptor
typedef
cudnnTensor4dDescriptor_t
cudnnTensorDescriptor_t
;
#include <assert.h>
static
inline
cudnnStatus_t
cudnnGetConvolution2dForwardOutputDim
(
const
cudnnConvolutionDescriptor_t
convDesc
,
const
cudnnTensorDescriptor_t
inputTensorDesc
,
const
cudnnFilterDescriptor_t
filterDesc
,
int
*
n
,
int
*
c
,
int
*
h
,
int
*
w
)
{
return
cudnnGetOutputTensor4dDim
(
convDesc
,
CUDNN_CONVOLUTION_FWD
,
n
,
c
,
h
,
w
);
}
#if CUDNN_VERSION < 3000
// Here we define the R3 API in terms of functions in the R2 interface
// This is only for what we use
typedef
int
cudnnConvolutionFwdAlgo_t
;
typedef
int
cudnnConvolutionFwdPreference_t
;
typedef
int
cudnnConvolutionBwdDataAlgo_t
;
#define CUDNN_CONVOLUTION_FWD_NO_WORKSPACE 0
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 0
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 1
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT 2
static
inline
cudnnStatus_t
cudnnGetConvolutionForwardAlgorithm
(
static
cudnnStatus_t
cudnnGetConvolutionBackwardDataWorkspaceSize
(
cudnnHandle_t
handle
,
const
cudnnTensorDescriptor_t
srcDesc
,
const
cudnnFilterDescriptor_t
filterDesc
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
cudnnTensorDescriptor_t
destDesc
,
cudnnConvolutionFwdPreference_t
preference
,
size_t
memoryLimitInbytes
,
cudnnConvolutionFwdAlgo_t
*
algo
)
{
*
algo
=
0
;
return
CUDNN_STATUS_SUCCESS
;
}
static
inline
cudnnStatus_t
cudnnGetConvolutionForwardWorkspaceSize
(
cudnnHandle_t
handle
,
const
cudnnTensorDescriptor_t
srcDesc
,
const
cudnnFilterDescriptor_t
filterDesc
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
cudnnTensor4dDescriptor_t
destDesc
,
cudnnConvolutionFwdAlgo_t
algo
,
size_t
*
sizeInBytes
)
{
const
cudnnTensorDescriptor_t
gradDesc
,
cudnnConvolutionBwdDataAlgo_t
algo
,
size_t
*
sizeInBytes
)
{
*
sizeInBytes
=
0
;
return
CUDNN_STATUS_SUCCESS
;
}
static
inline
cudnnStatus_t
cudnnConvolutionForward_v2
(
static
cudnnStatus_t
cudnnConvolutionBackwardData_v3
(
cudnnHandle_t
handle
,
const
void
*
alpha
,
const
cudnnTensorDescriptor_t
srcDesc
,
const
void
*
srcData
,
const
cudnnFilterDescriptor_t
filterDesc
,
const
void
*
filterData
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
void
*
diffData
,
const
cudnnConvolutionDescriptor_t
convDesc
,
cudnnConvolution
Fwd
Algo_t
algo
,
void
*
work
S
pace
,
size_t
work
S
paceSizeInBytes
,
cudnnConvolution
BwdData
Algo_t
algo
,
void
*
work
s
pace
,
size_t
work
s
paceSizeInBytes
,
const
void
*
beta
,
const
cudnnTensorDescriptor_t
destDesc
,
void
*
destData
)
{
assert
(
*
(
float
*
)
alpha
==
1
.
0
);
cudnnAccumulateResult_t
r
;
if
(
*
(
float
*
)
beta
==
0
.
0
)
{
r
=
CUDNN_RESULT_NO_ACCUMULATE
;
}
else
if
(
*
(
float
*
)
beta
==
1
.
0
)
{
r
=
CUDNN_RESULT_ACCUMULATE
;
}
else
{
assert
(
0
&&
"beta must be 0.0 or 1.0"
);
}
return
cudnnConvolutionForward
(
handle
,
srcDesc
,
srcData
,
filterDesc
,
filterData
,
convDesc
,
destDesc
,
destData
,
r
);
const
cudnnTensorDescriptor_t
gradDesc
,
void
*
gradData
)
{
return
cudnnConvolutionBackwardData
(
handle
,
alpha
,
filterDesc
,
filterData
,
diffDesc
,
diffData
,
convDesc
,
beta
,
gradDesc
,
gradData
);
}
#define cudnnConvolutionForward cudnnConvolutionForward_v2
static
inline
cudnnStatus_t
cudnnConvolutionBackwardFilter_v2
(
cudnnHandle_t
handle
,
const
void
*
alpha
,
const
cudnnTensorDescriptor_t
srcDesc
,
const
void
*
srcData
,
typedef
int
cudnnConvolutionBwdFilterAlgo_t
;
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 0
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 1
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT 2
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT 3
static
cudnnStatus_t
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
cudnnHandle_t
handle
,
const
cudnnTensorDescriptor_t
filterDesc
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
void
*
diffData
,
const
cudnnConvolutionDescriptor_t
convDesc
,
const
void
*
beta
,
const
cudnnFilterDescriptor_t
gradDesc
,
void
*
gradData
)
{
assert
(
*
(
float
*
)
alpha
==
1
.
0
);
cudnnAccumulateResult_t
r
;
if
(
*
(
float
*
)
beta
==
0
.
0
)
{
r
=
CUDNN_RESULT_NO_ACCUMULATE
;
}
else
if
(
*
(
float
*
)
beta
==
1
.
0
)
{
r
=
CUDNN_RESULT_ACCUMULATE
;
}
else
{
assert
(
0
&&
"beta must be 0.0 or 1.0"
);
}
return
cudnnConvolutionBackwardFilter
(
handle
,
srcDesc
,
srcData
,
diffDesc
,
diffData
,
convDesc
,
gradDesc
,
gradData
,
r
);
cudnnConvolutionBwdDataAlgo_t
algo
,
size_t
*
sizeInBytes
)
{
*
sizeInBytes
=
0
;
return
CUDNN_STATUS_SUCCESS
;
}
#define cudnnConvolutionBackwardFilter cudnnConvolutionBackwardFilter_v2
static
inline
cudnnStatus_t
cudnnConvolutionBackwardData_v2
(
cudnnHandle_t
handle
,
static
cudnnStatus_t
cudnnConvolutionBackwardFilter_v3
(
cudnnHandle_t
handle
,
const
void
*
alpha
,
const
cudnn
FilterDescriptor_t
filter
Desc
,
const
void
*
filter
Data
,
const
cudnn
TensorDescriptor_t
src
Desc
,
const
void
*
src
Data
,
const
cudnnTensorDescriptor_t
diffDesc
,
const
void
*
diffData
,
const
cudnnConvolutionDescriptor_t
convDesc
,
cudnnConvolutionBwdFilterAlgo_t
algo
,
void
*
workspace
,
size_t
workspaceSizeInBytes
,
const
void
*
beta
,
const
cudnn
Tenso
rDescriptor_t
gradDesc
,
const
cudnn
Filte
rDescriptor_t
gradDesc
,
void
*
gradData
)
{
assert
(
*
(
float
*
)
alpha
==
1
.
0
);
cudnnAccumulateResult_t
r
;
if
(
*
(
float
*
)
beta
==
0
.
0
)
{
r
=
CUDNN_RESULT_NO_ACCUMULATE
;
}
else
if
(
*
(
float
*
)
beta
==
1
.
0
)
{
r
=
CUDNN_RESULT_ACCUMULATE
;
}
else
{
assert
(
0
&&
"beta must be 0.0 or 1.0"
);
}
/* This function needs the casting because its params are not
declared as const */
return
cudnnConvolutionBackwardData
(
handle
,
(
cudnnFilterDescriptor_t
)
filterDesc
,
filterData
,
(
cudnnTensorDescriptor_t
)
diffDesc
,
diffData
,
(
cudnnConvolutionDescriptor_t
)
convDesc
,
(
cudnnTensorDescriptor_t
)
gradDesc
,
gradData
,
r
);
return
cudnnConvolutionBackwardFilter
(
handle
,
alpha
,
srcDesc
,
srcData
,
diffDesc
,
diffData
,
convDesc
,
beta
,
gradDesc
,
gradData
);
}
#define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2
//Needed for R2 rc2
# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING CUDNN_POOLING_AVERAGE
#else
// r2 rc1 and rc2 do not have the same macro defined
// I didn't checked if this the right combination, but as we do not wrap the padding interface, it is fine for now.
# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING ((cudnnPoolingMode_t)1)
#endif
#endif
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论