Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
f3900a58
提交
f3900a58
authored
6月 20, 2017
作者:
affanv14
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify cudnn implementation to support grouped comvolutions
上级
19f14deb
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
122 行增加
和
50 行删除
+122
-50
dnn.py
theano/gpuarray/dnn.py
+24
-15
dnn_base.c
theano/gpuarray/dnn_base.c
+46
-2
dnn_fwd.c
theano/gpuarray/dnn_fwd.c
+12
-7
dnn_gi.c
theano/gpuarray/dnn_gi.c
+19
-13
dnn_gw.c
theano/gpuarray/dnn_gw.c
+21
-13
没有找到文件。
theano/gpuarray/dnn.py
浏览文件 @
f3900a58
...
@@ -508,9 +508,10 @@ class GpuDnnConv(DnnBase):
...
@@ -508,9 +508,10 @@ class GpuDnnConv(DnnBase):
params_type
=
ParamsType
(
conv_algo
=
cudnn
.
cudnnConvolutionFwdAlgo_t
,
params_type
=
ParamsType
(
conv_algo
=
cudnn
.
cudnnConvolutionFwdAlgo_t
,
choose_algo
=
bool_t
,
choose_once
=
bool_t
,
choose_time
=
bool_t
,
choose_algo
=
bool_t
,
choose_once
=
bool_t
,
choose_time
=
bool_t
,
inplace
=
bool_t
,
inplace
=
bool_t
,
handle
=
handle_type
)
handle
=
handle_type
,
num_groups
=
int_t
)
def
__init__
(
self
,
algo
=
None
,
inplace
=
False
):
def
__init__
(
self
,
algo
=
None
,
inplace
=
False
,
num_groups
=
1
):
DnnBase
.
__init__
(
self
,
[
"dnn_conv_base.c"
,
"dnn_fwd.c"
],
DnnBase
.
__init__
(
self
,
[
"dnn_conv_base.c"
,
"dnn_fwd.c"
],
"APPLY_SPECIFIC(conv_fwd)"
)
"APPLY_SPECIFIC(conv_fwd)"
)
...
@@ -530,6 +531,7 @@ class GpuDnnConv(DnnBase):
...
@@ -530,6 +531,7 @@ class GpuDnnConv(DnnBase):
self
.
choose_algo
=
self
.
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
self
.
choose_algo
=
self
.
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
self
.
choose_once
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_ONCE
self
.
choose_once
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_ONCE
self
.
choose_time
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_TIME
self
.
choose_time
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_TIME
self
.
num_groups
=
num_groups
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
...
@@ -642,9 +644,10 @@ class GpuDnnConvGradW(DnnBase):
...
@@ -642,9 +644,10 @@ class GpuDnnConvGradW(DnnBase):
params_type
=
ParamsType
(
conv_algo
=
cudnn
.
cudnnConvolutionBwdFilterAlgo_t
,
params_type
=
ParamsType
(
conv_algo
=
cudnn
.
cudnnConvolutionBwdFilterAlgo_t
,
choose_algo
=
bool_t
,
choose_once
=
bool_t
,
choose_time
=
bool_t
,
choose_algo
=
bool_t
,
choose_once
=
bool_t
,
choose_time
=
bool_t
,
inplace
=
bool_t
,
inplace
=
bool_t
,
handle
=
handle_type
)
handle
=
handle_type
,
num_groups
=
int_t
)
def
__init__
(
self
,
inplace
=
False
,
algo
=
None
):
def
__init__
(
self
,
inplace
=
False
,
algo
=
None
,
num_groups
=
1
):
DnnBase
.
__init__
(
self
,
[
"dnn_conv_base.c"
,
"dnn_gw.c"
],
DnnBase
.
__init__
(
self
,
[
"dnn_conv_base.c"
,
"dnn_gw.c"
],
"APPLY_SPECIFIC(conv_gw)"
)
"APPLY_SPECIFIC(conv_gw)"
)
self
.
inplace
=
bool
(
inplace
)
self
.
inplace
=
bool
(
inplace
)
...
@@ -662,6 +665,7 @@ class GpuDnnConvGradW(DnnBase):
...
@@ -662,6 +665,7 @@ class GpuDnnConvGradW(DnnBase):
self
.
choose_algo
=
self
.
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
self
.
choose_algo
=
self
.
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
self
.
choose_once
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_ONCE
self
.
choose_once
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_ONCE
self
.
choose_time
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_TIME
self
.
choose_time
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_TIME
self
.
num_groups
=
num_groups
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
...
@@ -771,9 +775,10 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -771,9 +775,10 @@ class GpuDnnConvGradI(DnnBase):
params_type
=
ParamsType
(
conv_algo
=
cudnn
.
cudnnConvolutionBwdDataAlgo_t
,
params_type
=
ParamsType
(
conv_algo
=
cudnn
.
cudnnConvolutionBwdDataAlgo_t
,
choose_algo
=
bool_t
,
choose_once
=
bool_t
,
choose_time
=
bool_t
,
choose_algo
=
bool_t
,
choose_once
=
bool_t
,
choose_time
=
bool_t
,
inplace
=
bool_t
,
inplace
=
bool_t
,
handle
=
handle_type
)
handle
=
handle_type
,
num_groups
=
int_t
)
def
__init__
(
self
,
inplace
=
False
,
algo
=
None
):
def
__init__
(
self
,
inplace
=
False
,
algo
=
None
,
num_groups
=
1
):
DnnBase
.
__init__
(
self
,
[
"dnn_conv_base.c"
,
"dnn_gi.c"
],
DnnBase
.
__init__
(
self
,
[
"dnn_conv_base.c"
,
"dnn_gi.c"
],
"APPLY_SPECIFIC(conv_gi)"
)
"APPLY_SPECIFIC(conv_gi)"
)
self
.
inplace
=
bool
(
inplace
)
self
.
inplace
=
bool
(
inplace
)
...
@@ -791,6 +796,7 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -791,6 +796,7 @@ class GpuDnnConvGradI(DnnBase):
self
.
choose_algo
=
self
.
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
self
.
choose_algo
=
self
.
algo
in
SUPPORTED_DNN_CONV_ALGO_RUNTIME
self
.
choose_once
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_ONCE
self
.
choose_once
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_ONCE
self
.
choose_time
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_TIME
self
.
choose_time
=
self
.
algo
in
DNN_CONV_ALGO_CHOOSE_TIME
self
.
num_groups
=
num_groups
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
...
@@ -854,7 +860,7 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -854,7 +860,7 @@ class GpuDnnConvGradI(DnnBase):
def
dnn_conv
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
),
def
dnn_conv
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
),
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
,
num_groups
=
1
,
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
,
algo
=
None
,
precision
=
None
):
algo
=
None
,
precision
=
None
):
"""
"""
GPU convolution using cuDNN from NVIDIA.
GPU convolution using cuDNN from NVIDIA.
...
@@ -973,7 +979,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
...
@@ -973,7 +979,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
filter_dilation
=
dilation
)
filter_dilation
=
dilation
)
out_shp
=
assert_conv_shape
(
out_shp
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
return
GpuDnnConv
(
algo
=
algo
)(
img
,
kerns
,
out
,
desc
)
return
GpuDnnConv
(
algo
=
algo
,
num_groups
=
num_groups
)(
img
,
kerns
,
out
,
desc
)
def
dnn_conv3d
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
dilation
=
(
1
,
1
,
1
),
def
dnn_conv3d
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
dilation
=
(
1
,
1
,
1
),
...
@@ -1097,7 +1103,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
...
@@ -1097,7 +1103,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
def
dnn_gradweight
(
img
,
topgrad
,
kerns_shp
,
border_mode
=
'valid'
,
def
dnn_gradweight
(
img
,
topgrad
,
kerns_shp
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
),
conv_mode
=
'conv'
,
precision
=
None
):
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
),
num_groups
=
1
,
conv_mode
=
'conv'
,
precision
=
None
):
"""
"""
TODO: document this
TODO: document this
"""
"""
...
@@ -1112,7 +1118,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
...
@@ -1112,7 +1118,7 @@ def dnn_gradweight(img, topgrad, kerns_shp, border_mode='valid',
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns_shp
)
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns_shp
)
out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
kerns_shp
)
out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
kerns_shp
)
return
GpuDnnConvGradW
()(
img
,
topgrad
,
out
,
desc
)
return
GpuDnnConvGradW
(
num_groups
=
num_groups
)(
img
,
topgrad
,
out
,
desc
)
def
dnn_gradweight3d
(
img
,
topgrad
,
kerns_shp
,
border_mode
=
'valid'
,
def
dnn_gradweight3d
(
img
,
topgrad
,
kerns_shp
,
border_mode
=
'valid'
,
...
@@ -1125,7 +1131,7 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
...
@@ -1125,7 +1131,7 @@ def dnn_gradweight3d(img, topgrad, kerns_shp, border_mode='valid',
def
dnn_gradinput
(
kerns
,
topgrad
,
img_shp
,
border_mode
=
'valid'
,
def
dnn_gradinput
(
kerns
,
topgrad
,
img_shp
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
),
conv_mode
=
'conv'
,
precision
=
None
):
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
),
num_groups
=
1
,
conv_mode
=
'conv'
,
precision
=
None
):
"""
"""
TODO: document this
TODO: document this
"""
"""
...
@@ -1140,7 +1146,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
...
@@ -1140,7 +1146,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
out
=
GpuAllocEmpty
(
dtype
=
kerns
.
dtype
,
context_name
=
ctx_name
)(
*
img_shp
)
out
=
GpuAllocEmpty
(
dtype
=
kerns
.
dtype
,
context_name
=
ctx_name
)(
*
img_shp
)
return
GpuDnnConvGradI
()(
kerns
,
topgrad
,
out
,
desc
)
return
GpuDnnConvGradI
(
num_groups
=
num_groups
)(
kerns
,
topgrad
,
out
,
desc
)
def
dnn_gradinput3d
(
kerns
,
topgrad
,
img_shp
,
border_mode
=
'valid'
,
def
dnn_gradinput3d
(
kerns
,
topgrad
,
img_shp
,
border_mode
=
'valid'
,
...
@@ -2665,6 +2671,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
...
@@ -2665,6 +2671,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
border_mode
=
op
.
border_mode
,
border_mode
=
op
.
border_mode
,
subsample
=
op
.
subsample
,
subsample
=
op
.
subsample
,
dilation
=
op
.
filter_dilation
,
dilation
=
op
.
filter_dilation
,
num_groups
=
op
.
num_groups
,
direction_hint
=
'forward!'
,
direction_hint
=
'forward!'
,
conv_mode
=
conv_mode
)
conv_mode
=
conv_mode
)
elif
isinstance
(
op
,
AbstractConv2d_gradWeights
):
elif
isinstance
(
op
,
AbstractConv2d_gradWeights
):
...
@@ -2674,6 +2681,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
...
@@ -2674,6 +2681,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
border_mode
=
op
.
border_mode
,
border_mode
=
op
.
border_mode
,
subsample
=
op
.
subsample
,
subsample
=
op
.
subsample
,
dilation
=
op
.
filter_dilation
,
dilation
=
op
.
filter_dilation
,
num_groups
=
op
.
num_groups
,
conv_mode
=
conv_mode
)
conv_mode
=
conv_mode
)
elif
isinstance
(
op
,
AbstractConv2d_gradInputs
):
elif
isinstance
(
op
,
AbstractConv2d_gradInputs
):
shape
=
(
inp2
.
shape
[
0
],
inp1
.
shape
[
1
],
shape
=
(
inp2
.
shape
[
0
],
inp1
.
shape
[
1
],
...
@@ -2682,6 +2690,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
...
@@ -2682,6 +2690,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
border_mode
=
op
.
border_mode
,
border_mode
=
op
.
border_mode
,
subsample
=
op
.
subsample
,
subsample
=
op
.
subsample
,
dilation
=
op
.
filter_dilation
,
dilation
=
op
.
filter_dilation
,
num_groups
=
op
.
num_groups
,
conv_mode
=
conv_mode
)
conv_mode
=
conv_mode
)
return
[
rval
]
return
[
rval
]
...
@@ -2767,17 +2776,17 @@ def local_abstractconv_gi_cudnn(node):
...
@@ -2767,17 +2776,17 @@ def local_abstractconv_gi_cudnn(node):
@inplace_allocempty
(
GpuDnnConv
,
2
)
@inplace_allocempty
(
GpuDnnConv
,
2
)
def
local_dnn_conv_inplace
(
node
,
inputs
):
def
local_dnn_conv_inplace
(
node
,
inputs
):
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
return
[
GpuDnnConv
(
algo
=
node
.
op
.
algo
,
inplace
=
True
,
num_groups
=
node
.
op
.
num_groups
)(
*
inputs
)]
@inplace_allocempty
(
GpuDnnConvGradW
,
2
)
@inplace_allocempty
(
GpuDnnConvGradW
,
2
)
def
local_dnn_convgw_inplace
(
node
,
inputs
):
def
local_dnn_convgw_inplace
(
node
,
inputs
):
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
return
[
GpuDnnConvGradW
(
algo
=
node
.
op
.
algo
,
inplace
=
True
,
num_groups
=
node
.
op
.
num_groups
)(
*
inputs
)]
@inplace_allocempty
(
GpuDnnConvGradI
,
2
)
@inplace_allocempty
(
GpuDnnConvGradI
,
2
)
def
local_dnn_convgi_inplace
(
node
,
inputs
):
def
local_dnn_convgi_inplace
(
node
,
inputs
):
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
,
inplace
=
True
)(
*
inputs
)]
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
,
inplace
=
True
,
num_groups
=
node
.
op
.
num_groups
)(
*
inputs
)]
optdb
.
register
(
'local_dnna_conv_inplace'
,
optdb
.
register
(
'local_dnna_conv_inplace'
,
tensor
.
opt
.
in2out
(
local_dnn_conv_inplace
,
tensor
.
opt
.
in2out
(
local_dnn_conv_inplace
,
...
...
theano/gpuarray/dnn_base.c
浏览文件 @
f3900a58
...
@@ -54,6 +54,49 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
...
@@ -54,6 +54,49 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
return
0
;
return
0
;
}
}
static
int
c_set_tensor_for_conv
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
desc
,
int
groups
)
{
cudnnDataType_t
dt
;
size_t
ds
;
switch
(
var
->
ga
.
typecode
)
{
case
GA_FLOAT
:
dt
=
CUDNN_DATA_FLOAT
;
break
;
case
GA_DOUBLE
:
dt
=
CUDNN_DATA_DOUBLE
;
break
;
case
GA_HALF
:
dt
=
CUDNN_DATA_HALF
;
break
;
default:
PyErr_SetString
(
PyExc_TypeError
,
"Non-float datatype in c_set_tensorNd"
);
return
-
1
;
}
ds
=
gpuarray_get_elsize
(
var
->
ga
.
typecode
);
int
strs
[
8
],
dims
[
8
],
default_stride
=
1
;
unsigned
int
nd
=
PyGpuArray_NDIM
(
var
);
for
(
unsigned
int
_i
=
nd
;
_i
>
0
;
_i
--
)
{
unsigned
int
i
=
_i
-
1
;
strs
[
i
]
=
(
PyGpuArray_DIM
(
var
,
i
)
!=
1
&&
PyGpuArray_STRIDE
(
var
,
i
))
?
PyGpuArray_STRIDE
(
var
,
i
)
/
ds
:
default_stride
;
default_stride
*=
PyGpuArray_DIM
(
var
,
i
);
dims
[
i
]
=
PyGpuArray_DIM
(
var
,
i
);
}
dims
[
1
]
=
dims
[
1
]
/
groups
;
cudnnStatus_t
err
=
cudnnSetTensorNdDescriptor
(
desc
,
dt
,
nd
,
dims
,
strs
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set tensorNd descriptor: %s"
,
cudnnGetErrorString
(
err
));
return
-
1
;
}
return
0
;
}
static
int
c_make_tensorNd
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
*
desc
)
{
static
int
c_make_tensorNd
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
*
desc
)
{
cudnnStatus_t
err
;
cudnnStatus_t
err
;
err
=
cudnnCreateTensorDescriptor
(
desc
);
err
=
cudnnCreateTensorDescriptor
(
desc
);
...
@@ -71,7 +114,7 @@ static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc)
...
@@ -71,7 +114,7 @@ static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc)
}
}
static
int
static
int
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
)
{
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
,
int
groups
)
{
cudnnDataType_t
dt
;
cudnnDataType_t
dt
;
cudnnStatus_t
err
;
cudnnStatus_t
err
;
...
@@ -111,6 +154,7 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
...
@@ -111,6 +154,7 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
/* Filters can't be less than 3d so we pad */
/* Filters can't be less than 3d so we pad */
for
(
unsigned
int
i
=
nd
;
i
<
3
;
i
++
)
for
(
unsigned
int
i
=
nd
;
i
<
3
;
i
++
)
dims
[
i
]
=
1
;
dims
[
i
]
=
1
;
dims
[
0
]
/=
groups
;
if
(
nd
<
3
)
if
(
nd
<
3
)
nd
=
3
;
nd
=
3
;
...
@@ -135,7 +179,7 @@ static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
...
@@ -135,7 +179,7 @@ static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
return
-
1
;
return
-
1
;
}
}
if
(
c_set_filter
(
var
,
*
desc
)
!=
0
)
{
if
(
c_set_filter
(
var
,
*
desc
,
1
)
!=
0
)
{
cudnnDestroyFilterDescriptor
(
*
desc
);
cudnnDestroyFilterDescriptor
(
*
desc
);
return
-
1
;
return
-
1
;
}
}
...
...
theano/gpuarray/dnn_fwd.c
浏览文件 @
f3900a58
...
@@ -29,7 +29,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -29,7 +29,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
float
af
=
alpha
,
bf
=
beta
;
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
]
*
params
->
num_groups
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same stack size"
);
"images and kernel must have the same stack size"
);
return
1
;
return
1
;
...
@@ -72,12 +72,15 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -72,12 +72,15 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return
0
;
return
0
;
}
}
if
(
c_set_tensor
Nd
(
input
,
APPLY_SPECIFIC
(
input
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
input
,
APPLY_SPECIFIC
(
input
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
)
,
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_tensor
Nd
(
*
output
,
APPLY_SPECIFIC
(
output
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
*
output
,
APPLY_SPECIFIC
(
output
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
size_t
input_offset
=
PyGpuArray_STRIDE
(
input
,
0
)
/
params
->
num_groups
;
size_t
kern_offset
=
PyGpuArray_STRIDE
(
kerns
,
0
)
*
PyGpuArray_DIM
(
kerns
,
0
)
/
params
->
num_groups
;
size_t
output_offset
=
PyGpuArray_STRIDE
(
*
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionFwdAlgo_t
algo
=
params
->
conv_algo
;
cudnnConvolutionFwdAlgo_t
algo
=
params
->
conv_algo
;
...
@@ -264,15 +267,17 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -264,15 +267,17 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_wait
(
kerns
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
kerns
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
for
(
int
g
=
0
;
g
<
params
->
num_groups
;
g
++
)
{
err
=
cudnnConvolutionForward
(
err
=
cudnnConvolutionForward
(
params
->
handle
,
params
->
handle
,
alpha_p
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
)
+
input_offset
*
g
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
)
+
kern_offset
*
g
,
desc
,
algo
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
beta_p
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
));
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
)
+
output_offset
*
g
);
}
if
(
worksize
!=
0
)
if
(
worksize
!=
0
)
gpudata_release
(
workspace
);
gpudata_release
(
workspace
);
...
...
theano/gpuarray/dnn_gi.c
浏览文件 @
f3900a58
...
@@ -32,7 +32,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -32,7 +32,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
float
af
=
alpha
,
bf
=
beta
;
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
im
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
if
(
PyGpuArray_DIMS
(
im
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
]
*
params
->
num_groups
)
{
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same "
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same "
"stack size"
);
"stack size"
);
return
1
;
return
1
;
...
@@ -78,12 +78,15 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -78,12 +78,15 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return
0
;
return
0
;
}
}
if
(
c_set_tensor
Nd
(
output
,
APPLY_SPECIFIC
(
output
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
output
,
APPLY_SPECIFIC
(
output
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
)
,
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_tensor
Nd
(
*
input
,
APPLY_SPECIFIC
(
input
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
*
input
,
APPLY_SPECIFIC
(
input
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
size_t
input_offset
=
PyGpuArray_STRIDE
(
*
input
,
0
)
/
params
->
num_groups
;
size_t
kern_offset
=
PyGpuArray_STRIDE
(
kerns
,
0
)
*
PyGpuArray_DIM
(
kerns
,
0
)
/
params
->
num_groups
;
size_t
output_offset
=
PyGpuArray_STRIDE
(
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionBwdDataAlgo_t
algo
=
params
->
conv_algo
;
cudnnConvolutionBwdDataAlgo_t
algo
=
params
->
conv_algo
;
...
@@ -100,7 +103,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -100,7 +103,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
}
if
(
PyGpuArray_NDIM
(
im
)
==
4
)
{
if
(
PyGpuArray_NDIM
(
im
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
/
params
->
num_groups
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
...
@@ -294,14 +297,17 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -294,14 +297,17 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardData
(
for
(
int
g
=
0
;
g
<
params
->
num_groups
;
g
++
)
params
->
handle
,
{
alpha_p
,
err
=
cudnnConvolutionBackwardData
(
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
params
->
handle
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
alpha_p
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
)
+
kern_offset
*
g
,
beta_p
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
)
+
output_offset
*
g
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
));
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
)
+
input_offset
*
g
);
}
if
(
worksize
!=
0
)
if
(
worksize
!=
0
)
gpudata_release
(
workspace
);
gpudata_release
(
workspace
);
...
...
theano/gpuarray/dnn_gw.c
浏览文件 @
f3900a58
...
@@ -28,7 +28,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -28,7 +28,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
float
af
=
alpha
,
bf
=
beta
;
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
km
)[
1
])
{
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
km
)[
1
]
*
params
->
num_groups
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"GpuDnnConv images and kernel must have the same stack size"
);
"GpuDnnConv images and kernel must have the same stack size"
);
return
1
;
return
1
;
...
@@ -71,13 +71,17 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -71,13 +71,17 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return
0
;
return
0
;
}
}
if
(
c_set_tensor
Nd
(
input
,
APPLY_SPECIFIC
(
input
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
input
,
APPLY_SPECIFIC
(
input
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_tensor
Nd
(
output
,
APPLY_SPECIFIC
(
output
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
output
,
APPLY_SPECIFIC
(
output
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
)
,
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
size_t
input_offset
=
PyGpuArray_STRIDE
(
input
,
0
)
/
params
->
num_groups
;
size_t
kern_offset
=
PyGpuArray_STRIDE
(
*
kerns
,
0
)
*
PyGpuArray_DIM
(
*
kerns
,
0
)
/
params
->
num_groups
;
size_t
output_offset
=
PyGpuArray_STRIDE
(
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionBwdFilterAlgo_t
algo
=
params
->
conv_algo
;
cudnnConvolutionBwdFilterAlgo_t
algo
=
params
->
conv_algo
;
cuda_enter
(
c
->
ctx
);
cuda_enter
(
c
->
ctx
);
...
@@ -93,7 +97,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -93,7 +97,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
/
params
->
num_groups
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%dx%ld"
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%dx%ld"
...
@@ -264,14 +268,18 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -264,14 +268,18 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardFilter
(
for
(
int
g
=
0
;
g
<
params
->
num_groups
;
g
++
)
params
->
handle
,
{
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
err
=
cudnnConvolutionBackwardFilter
(
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
params
->
handle
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
alpha_p
,
beta_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
)
+
input_offset
*
g
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
));
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
)
+
output_offset
*
g
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
)
+
kern_offset
*
g
);
}
if
(
worksize
!=
0
)
if
(
worksize
!=
0
)
gpudata_release
(
workspace
);
gpudata_release
(
workspace
);
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论