Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
740295e2
提交
740295e2
authored
9月 16, 2014
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Make the core code for the convolution reusable.
上级
c022347b
显示空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
150 行增加
和
107 行删除
+150
-107
dnn.py
theano/sandbox/cuda/dnn.py
+148
-105
test_conv_cuda_ndarray.py
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+2
-2
没有找到文件。
theano/sandbox/cuda/dnn.py
浏览文件 @
740295e2
...
@@ -10,12 +10,22 @@ from theano.sandbox.cuda import GpuOp
...
@@ -10,12 +10,22 @@ from theano.sandbox.cuda import GpuOp
from
theano.sandbox.cuda.basic_ops
import
(
as_cuda_ndarray_variable
,
from
theano.sandbox.cuda.basic_ops
import
(
as_cuda_ndarray_variable
,
gpu_contiguous
)
gpu_contiguous
)
from
theano.sandbox.cuda.blas
import
GpuConv
from
theano.sandbox.cuda.blas
import
GpuConv
from
theano.compat
import
PY3
class
GpuDnnConv
(
GpuOp
):
__props__
=
(
'border_mode'
,)
def
__init__
(
self
,
border_mode
):
class
GpuDnnConvBase
(
GpuOp
):
__props__
=
(
'border_mode'
,
'conv_mode'
)
def
__init__
(
self
,
border_mode
,
conv_mode
=
'conv'
):
assert
border_mode
in
(
'valid'
,
'full'
)
self
.
border_mode
=
border_mode
self
.
border_mode
=
border_mode
assert
conv_mode
in
(
'conv'
,
'cross'
)
self
.
conv_mode
=
conv_mode
def
__setstate__
(
self
,
props
):
self
.
__dict__
.
update
(
props
)
if
not
hasattr
(
self
,
'conv_mode'
):
self
.
conv_mode
=
'conv'
def
make_node
(
self
,
img
,
kern
):
def
make_node
(
self
,
img
,
kern
):
if
img
.
type
.
ndim
!=
4
:
if
img
.
type
.
ndim
!=
4
:
...
@@ -38,61 +48,106 @@ class GpuDnnConv(GpuOp):
...
@@ -38,61 +48,106 @@ class GpuDnnConv(GpuOp):
def
c_libraries
(
self
):
def
c_libraries
(
self
):
return
[
'cudnn'
]
return
[
'cudnn'
]
def
c_support_code
_struct
(
self
,
node
,
struct_id
):
def
c_support_code
(
self
):
return
"""
return
"""
cudnnHandle_t handle
%(id)
d;
cudnnHandle_t _handle = NULL;
cudnnTensor4dDescriptor_t input
%(id)
d;
"""
cudnnTensor4dDescriptor_t output
%(id)
d;
cudnnFilterDescriptor_t kerns
%(id)
d;
cudnnConvolutionDescriptor_t op
%(id)
d;
"""
%
dict
(
id
=
struct_id
)
def
c_init_code_struct
(
self
,
node
,
struct_id
,
sub
):
def
c_init_code
(
self
):
return
"""
if
PY3
:
handle
%(id)
d = NULL;
error_out
=
"NULL"
input
%(id)
d = NULL;
else
:
output
%(id)
d = NULL;
error_out
=
""
kerns
%(id)
d = NULL;
return
[
"""{
op
%(id)
d = NULL;
cudnnStatus_t err;
cudnnStatus_t err
%(id)
d;
if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
if ((err
%(id)
d = cudnnCreate(&handle
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle:
%%
s",
PyErr_Format(PyExc_RuntimeError, "could not create cudnn handle:
%%
s",
cudnnGetErrorString(err
%(id)
d
));
cudnnGetErrorString(err));
%(fail)
s
return
%
s;
}
}
if ((err
%(id)
d = cudnnCreateTensor4dDescriptor(&input
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
}"""
%
(
error_out
,)]
def
c_support_code_struct
(
self
,
node
,
struct_id
):
types
=
[
'cudnn'
+
d
.
capitalize
()
+
'Descriptor_t'
for
d
in
self
.
descriptors
]
elems
=
[
t
+
' param
%
d_
%
d;'
%
(
i
,
struct_id
)
for
i
,
t
in
enumerate
(
types
)]
return
(
"cudnnConvolutionDescriptor_t op
%
d;
\n
"
%
(
struct_id
,)
+
'
\n
'
.
join
(
elems
))
def
c_init_code_struct
(
self
,
node
,
struct_id
,
sub
):
vnames
=
[
'param
%
d_
%
d'
%
(
i
,
struct_id
)
for
i
,
t
in
enumerate
(
self
.
descriptors
)]
inits
=
[
vname
+
'= NULL;'
for
vname
in
vnames
]
creates
=
[]
for
d
,
var
in
zip
(
self
.
descriptors
,
vnames
):
creates
.
append
(
"""
if ((err
%(id)
d = cudnnCreate
%(d)
sDescriptor(&
%(var)
s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(inp):
%%
s", cudnnGetErrorString(err
%(id)
d));
"(inp):
%%
s", cudnnGetErrorString(err
%(id)
d));
%(fail)
s
%(fail)
s
}
}
if ((err
%(id)
d = cudnnCreateTensor4dDescriptor(&output
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
"""
%
dict
(
id
=
struct_id
,
d
=
d
.
capitalize
(),
var
=
var
,
fail
=
sub
[
'fail'
]))
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(out):
%%
s", cudnnGetErrorString(err
%(id)
d));
return
"""
%(fail)
s
%(init)
s
}
cudnnStatus_t err
%(id)
d;
if ((err
%(id)
d = cudnnCreateFilterDescriptor(&kerns
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
%(create)
s
PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor:
%%
s",
cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
if ((err
%(id)
d = cudnnCreateConvolutionDescriptor(&op
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
if ((err
%(id)
d = cudnnCreateConvolutionDescriptor(&op
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate convolution "
PyErr_Format(PyExc_MemoryError, "could not allocate convolution "
"descriptor:
%%
s", cudnnGetErrorString(err
%(id)
d));
"descriptor:
%%
s", cudnnGetErrorString(err
%(id)
d));
%(fail)
s
%(fail)
s
}
}
"""
%
dict
(
id
=
struct_id
,
fail
=
sub
[
'fail'
])
"""
%
dict
(
id
=
struct_id
,
fail
=
sub
[
'fail'
],
init
=
'
\n
'
.
join
(
inits
),
create
=
'
\n
'
.
join
(
creates
))
def
c_cleanup_code_struct
(
self
,
node
,
struct_id
):
def
c_cleanup_code_struct
(
self
,
node
,
struct_id
):
cleanups
=
[
'cudnnDestroy
%
sDescriptor(param
%
d_
%
d);'
%
(
d
.
capitalize
(),
i
,
struct_id
)
for
i
,
d
in
enumerate
(
self
.
descriptors
)]
return
"""
return
"""
cudnnDestroyTensor4dDescriptor(input
%(id)
d);
%(cleanup)
s
cudnnDestroyTensor4dDescriptor(output
%(id)
d);
cudnnDestroyFilterDescriptor(kerns
%(id)
d);
cudnnDestroyConvolutionDescriptor(op
%(id)
d);
cudnnDestroyConvolutionDescriptor(op
%(id)
d);
cudnnDestroy(handle
%(id)
d);
"""
%
dict
(
id
=
struct_id
,
cleanup
=
'
\n
'
.
join
(
cleanups
))
"""
%
dict
(
id
=
struct_id
)
def
c_set_tensor4d
(
self
,
var
,
desc
,
err
,
fail
):
return
"""
%(err)
s = cudnnSetTensor4dDescriptorEx(
%(desc)
s, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(
%(var)
s)[0],
CudaNdarray_HOST_DIMS(
%(var)
s)[1],
CudaNdarray_HOST_DIMS(
%(var)
s)[2],
CudaNdarray_HOST_DIMS(
%(var)
s)[3],
CudaNdarray_HOST_STRIDES(
%(var)
s)[0],
CudaNdarray_HOST_STRIDES(
%(var)
s)[1],
CudaNdarray_HOST_STRIDES(
%(var)
s)[2],
CudaNdarray_HOST_STRIDES(
%(var)
s)[3]
);
if (
%(err)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor:
%%
s",
cudnnGetErrorString(
%(err)
s));
%(fail)
s
}
"""
%
dict
(
var
=
var
,
err
=
err
,
desc
=
desc
,
fail
=
fail
)
def
c_set_filter
(
self
,
var
,
desc
,
err
,
fail
):
return
"""
%(err)
s = cudnnSetFilterDescriptor(
%(desc)
s, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(
%(var)
s)[0],
CudaNdarray_HOST_DIMS(
%(var)
s)[1],
CudaNdarray_HOST_DIMS(
%(var)
s)[2],
CudaNdarray_HOST_DIMS(
%(var)
s)[3]
);
if (
%(err)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set filter descriptor:
%%
s",
cudnnGetErrorString(
%(err)
s));
%(fail)
s
}
"""
%
dict
(
var
=
var
,
desc
=
desc
,
err
=
err
,
fail
=
fail
)
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
img
,
kern
=
inputs
param0
,
param1
=
inputs
out
,
=
outputs
out
,
=
outputs
if
self
.
border_mode
==
"valid"
:
if
self
.
border_mode
==
"valid"
:
...
@@ -101,65 +156,57 @@ cudnnDestroy(handle%(id)d);
...
@@ -101,65 +156,57 @@ cudnnDestroy(handle%(id)d);
assert
self
.
border_mode
==
"full"
assert
self
.
border_mode
==
"full"
bmode
=
0
bmode
=
0
if
self
.
conv_mode
==
'conv'
:
conv_flag
=
'CUDNN_CONVOLUTION'
else
:
conv_flag
=
'CUDNN_CROSS_CORRELATION'
vnames
=
[
'param
%
d_
%
d'
%
(
i
,
sub
[
'struct_id'
])
for
i
,
t
in
enumerate
(
self
.
descriptors
)]
checks
=
[]
for
v
in
(
param0
,
param1
):
checks
.
append
(
"""
if (!CudaNdarray_is_c_contiguous(
%
s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
%
s
}
"""
%
(
v
,
sub
[
'fail'
]))
sets
=
[]
for
p
,
v
,
d
in
zip
((
param0
,
param1
),
vnames
[:
-
1
],
self
.
descriptors
[:
-
1
]):
sets
.
append
(
getattr
(
self
,
'c_set_'
+
d
)(
p
,
v
,
'err'
+
name
,
sub
[
'fail'
]))
set_out
=
getattr
(
self
,
'c_set_'
+
self
.
descriptors
[
-
1
])(
out
,
vnames
[
-
1
],
'err'
+
name
,
sub
[
'fail'
])
return
"""
return
"""
cudnnStatus_t err
%(name)
s;
cudnnStatus_t err
%(name)
s;
int pad_w
%(name)
s;
int pad_w
%(name)
s;
int pad_h
%(name)
s;
int pad_h
%(name)
s;
if (!CudaNdarray_is_c_contiguous(
%(img)
s)) {
%(checks)
s
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
%(fail)
s
}
if (!CudaNdarray_is_c_contiguous(
%(kerns)
s)) {
%(sets)
s
PyErr_SetString(PyExc_ValueError, "Only contiguous filters are supported.");
%(fail)
s
}
err
%(name)
s = cudnnSetTensor4dDescriptorEx(
input
%(id)
d, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(
%(img)
s)[0],
CudaNdarray_HOST_DIMS(
%(img)
s)[1],
CudaNdarray_HOST_DIMS(
%(img)
s)[2],
CudaNdarray_HOST_DIMS(
%(img)
s)[3],
CudaNdarray_HOST_STRIDES(
%(img)
s)[0],
CudaNdarray_HOST_STRIDES(
%(img)
s)[1],
CudaNdarray_HOST_STRIDES(
%(img)
s)[2],
CudaNdarray_HOST_STRIDES(
%(img)
s)[3]
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
err
%(name)
s = cudnnSetFilterDescriptor(
kerns
%(id)
d, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(
%(kerns)
s)[0],
CudaNdarray_HOST_DIMS(
%(kerns)
s)[1],
CudaNdarray_HOST_DIMS(
%(kerns)
s)[2],
CudaNdarray_HOST_DIMS(
%(kerns)
s)[3]
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set filter descriptor:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
if (
%(bmode)
d == 1) {
if (
%(bmode)
d == 1) {
pad_h
%(name)
s = 0;
pad_h
%(name)
s = 0;
pad_w
%(name)
s = 0;
pad_w
%(name)
s = 0;
} else if (
%(bmode)
d == 0) {
} else if (
%(bmode)
d == 0) {
pad_h
%(name)
s = CudaNdarray_HOST_DIMS(
%(
kerns
)
s)[2] - 1;
pad_h
%(name)
s = CudaNdarray_HOST_DIMS(
%(
param1
)
s)[2] - 1;
pad_w
%(name)
s = CudaNdarray_HOST_DIMS(
%(
kerns
)
s)[3] - 1;
pad_w
%(name)
s = CudaNdarray_HOST_DIMS(
%(
param1
)
s)[3] - 1;
} else {
} else {
PyErr_SetString(PyExc_ValueError, "bad border mode");
PyErr_SetString(PyExc_ValueError, "bad border mode");
%(fail)
s
%(fail)
s
}
}
err
%(name)
s = cudnnSetConvolutionDescriptor(
err
%(name)
s = cudnnSetConvolutionDescriptor(
op
%(id)
d,
input
%(id)
d, kerns
%(id)
d,
op
%(id)
d,
param0_
%(id)
d, param1_
%(id)
d,
pad_h
%(name)
s,
pad_h
%(name)
s,
pad_w
%(name)
s,
pad_w
%(name)
s,
1, 1, 1, 1,
1, 1, 1, 1,
CUDNN_CONVOLUTION
%(conv_flag)
s
);
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
...
@@ -169,12 +216,12 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
...
@@ -169,12 +216,12 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
{
{
int out_dims[4];
int out_dims[4];
err
%(name)
s = cudnnGetOutputTensor4dDim(
err
%(name)
s = cudnnGetOutputTensor4dDim(
op
%(id)
d,
CUDNN_CONVOLUTION_FWD
,
op
%(id)
d,
%(path)
s
,
&out_dims[0], &out_dims[1],
&out_dims[0], &out_dims[1],
&out_dims[2], &out_dims[3]
&out_dims[2], &out_dims[3]
);
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not
set op descriptor
:
%%
s",
PyErr_Format(PyExc_RuntimeError, "could not
get output sizes
:
%%
s",
cudnnGetErrorString(err
%(name)
s));
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
%(fail)
s
}
}
...
@@ -182,28 +229,15 @@ if (CudaNdarray_prep_output(&%(out)s, 4, out_dims) != 0) {
...
@@ -182,28 +229,15 @@ if (CudaNdarray_prep_output(&%(out)s, 4, out_dims) != 0) {
%(fail)
s
%(fail)
s
}
}
}
}
err
%(name)
s = cudnnSetTensor4dDescriptorEx(
output
%(id)
d, CUDNN_DATA_FLOAT,
%(set_out)
s
CudaNdarray_HOST_DIMS(
%(out)
s)[0],
CudaNdarray_HOST_DIMS(
%(out)
s)[1],
err
%(name)
s =
%(method)
s(
CudaNdarray_HOST_DIMS(
%(out)
s)[2],
_handle,
CudaNdarray_HOST_DIMS(
%(out)
s)[3],
param0_
%(id)
d, CudaNdarray_DEV_DATA(
%(param0)
s),
CudaNdarray_HOST_STRIDES(
%(out)
s)[0],
param1_
%(id)
d, CudaNdarray_DEV_DATA(
%(param1)
s),
CudaNdarray_HOST_STRIDES(
%(out)
s)[1],
CudaNdarray_HOST_STRIDES(
%(out)
s)[2],
CudaNdarray_HOST_STRIDES(
%(out)
s)[3]
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set out descriptor:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
err
%(name)
s = cudnnConvolutionForward(
handle
%(id)
d,
input
%(id)
d, CudaNdarray_DEV_DATA(
%(img)
s),
kerns
%(id)
d, CudaNdarray_DEV_DATA(
%(kerns)
s),
op
%(id)
d,
op
%(id)
d,
output
%(id)
d, CudaNdarray_DEV_DATA(
%(out)
s),
param2_
%(id)
d, CudaNdarray_DEV_DATA(
%(out)
s),
CUDNN_RESULT_NO_ACCUMULATE
CUDNN_RESULT_NO_ACCUMULATE
);
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
...
@@ -211,11 +245,20 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
...
@@ -211,11 +245,20 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err
%(name)
s));
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
%(fail)
s
}
}
"""
%
dict
(
img
=
img
,
kerns
=
kern
,
out
=
out
,
bmode
=
bmode
,
"""
%
dict
(
param0
=
param0
,
param1
=
param1
,
out
=
out
,
bmode
=
bmode
,
fail
=
sub
[
'fail'
],
id
=
sub
[
'struct_id'
],
name
=
name
)
conv_flag
=
conv_flag
,
fail
=
sub
[
'fail'
],
id
=
sub
[
'struct_id'
],
name
=
name
,
checks
=
'
\n
'
.
join
(
checks
),
sets
=
'
\n
'
.
join
(
sets
),
set_out
=
set_out
,
method
=
self
.
conv_op
,
path
=
self
.
path_flag
)
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
4
,)
return
(
5
,)
class
GpuDnnConv
(
GpuDnnConvBase
):
descriptors
=
(
'tensor4d'
,
'filter'
,
'tensor4d'
)
path_flag
=
'CUDNN_CONVOLUTION_FWD'
conv_op
=
'cudnnConvolutionForward'
from
theano.sandbox.cuda.opt
import
(
local_optimizer
,
gpu_contiguous
,
from
theano.sandbox.cuda.opt
import
(
local_optimizer
,
gpu_contiguous
,
...
...
theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
浏览文件 @
740295e2
...
@@ -4,7 +4,7 @@ Tests for GPU convolution
...
@@ -4,7 +4,7 @@ Tests for GPU convolution
import
sys
import
sys
import
time
import
time
import
unittest
import
unittest
import
traceback
import
numpy
import
numpy
...
@@ -286,7 +286,7 @@ def exec_conv(version, shapes, verbose, random, mode,
...
@@ -286,7 +286,7 @@ def exec_conv(version, shapes, verbose, random, mode,
cls
=
cls
)
cls
=
cls
)
except
Exception
,
e
:
except
Exception
,
e
:
print
ver
,
id
,
(
ishape
,
kshape
,
subshape
,
istride
,
kstride
)
print
ver
,
id
,
(
ishape
,
kshape
,
subshape
,
istride
,
kstride
)
print
"Exception"
,
type
(
e
),
e
print
traceback
.
format_exc
()
pass
pass
if
not
ret
:
if
not
ret
:
failed_version
.
add
(
ver
)
failed_version
.
add
(
ver
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论