Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
6117f98b
提交
6117f98b
authored
6月 18, 2015
作者:
Nicolas Ballas
提交者:
--global
8月 04, 2015
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cudnnv3 conv3d
上级
34223240
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
164 行增加
和
27 行删除
+164
-27
dnn.py
theano/sandbox/cuda/dnn.py
+0
-0
dnn_base.c
theano/sandbox/cuda/dnn_base.c
+54
-0
dnn_conv_base.c
theano/sandbox/cuda/dnn_conv_base.c
+6
-6
dnn_fwd.c
theano/sandbox/cuda/dnn_fwd.c
+26
-11
dnn_gi.c
theano/sandbox/cuda/dnn_gi.c
+13
-5
dnn_gw.c
theano/sandbox/cuda/dnn_gw.c
+12
-5
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+53
-0
没有找到文件。
theano/sandbox/cuda/dnn.py
浏览文件 @
6117f98b
差异被折叠。
点击展开。
theano/sandbox/cuda/dnn_base.c
浏览文件 @
6117f98b
...
...
@@ -33,6 +33,60 @@ c_set_tensor4d(CudaNdarray *var, cudnnTensorDescriptor_t desc) {
return
0
;
}
static
int
c_set_tensorNd
(
CudaNdarray
*
var
,
int
dim
,
cudnnTensorDescriptor_t
desc
)
{
int
strides
[
dim
];
for
(
int
i
=
0
;
i
<
dim
;
++
i
)
{
if
(
CudaNdarray_HOST_STRIDES
(
var
)[
i
])
strides
[
i
]
=
CudaNdarray_HOST_STRIDES
(
var
)[
i
];
else
{
strides
[
i
]
=
1
;
for
(
int
j
=
i
+
1
;
j
<
dim
;
++
j
)
strides
[
i
]
*=
CudaNdarray_HOST_DIMS
(
var
)[
j
];
}
}
cudnnStatus_t
err
=
cudnnSetTensorNdDescriptor
(
desc
,
CUDNN_DATA_FLOAT
,
dim
,
CudaNdarray_HOST_DIMS
(
var
),
strides
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set tensorNd descriptor: %s"
"dim=%d"
,
cudnnGetErrorString
(
err
),
dim
);
return
-
1
;
}
return
0
;
}
static
int
c_set_filterNd
(
CudaNdarray
*
var
,
int
dim
,
cudnnFilterDescriptor_t
desc
)
{
if
(
!
CudaNdarray_is_c_contiguous
(
var
))
{
PyErr_SetString
(
PyExc_ValueError
,
"Only contiguous filters (kernels) are supported."
);
return
-
1
;
}
cudnnStatus_t
err
=
cudnnSetFilterNdDescriptor
(
desc
,
CUDNN_DATA_FLOAT
,
dim
,
CudaNdarray_HOST_DIMS
(
var
));
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set filter descriptor: %s."
" dims= %d"
,
cudnnGetErrorString
(
err
),
dim
);
return
-
1
;
}
return
0
;
}
static
int
c_set_filter
(
CudaNdarray
*
var
,
cudnnFilterDescriptor_t
desc
)
{
if
(
!
CudaNdarray_is_c_contiguous
(
var
))
{
...
...
theano/sandbox/cuda/dnn_conv_base.c
浏览文件 @
6117f98b
...
...
@@ -7,9 +7,9 @@ cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns);
and the algorithms, if any, that were selected according to these dimensions
and according to the amount of memory available at that time.
*/
int
APPLY_SPECIFIC
(
previous_input_shape
)[
4
];
int
APPLY_SPECIFIC
(
previous_kerns_shape
)[
4
];
int
APPLY_SPECIFIC
(
previous_output_shape
)[
4
];
int
APPLY_SPECIFIC
(
previous_input_shape
)[
5
];
int
APPLY_SPECIFIC
(
previous_kerns_shape
)[
5
];
int
APPLY_SPECIFIC
(
previous_output_shape
)[
5
];
cudnnConvolutionFwdAlgo_t
APPLY_SPECIFIC
(
previous_algo
);
cudnnConvolutionBwdFilterAlgo_t
APPLY_SPECIFIC
(
previous_bwd_f_algo
);
cudnnConvolutionBwdDataAlgo_t
APPLY_SPECIFIC
(
previous_bwd_d_algo
);
...
...
@@ -21,12 +21,12 @@ APPLY_SPECIFIC(input) = NULL;
APPLY_SPECIFIC
(
output
)
=
NULL
;
APPLY_SPECIFIC
(
kerns
)
=
NULL
;
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
input
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor
4d
descriptor "
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor descriptor "
"(inp): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
FAIL
;
}
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
output
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor
4d
descriptor "
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor descriptor "
"(out): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
FAIL
;
}
...
...
@@ -36,7 +36,7 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateFilterDescriptor(&APPLY_SPECIFIC(kerns)))
FAIL
;
}
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]
=
0
;
APPLY_SPECIFIC
(
previous_kerns_shape
)[
i
]
=
0
;
...
...
theano/sandbox/cuda/dnn_fwd.c
浏览文件 @
6117f98b
...
...
@@ -3,7 +3,8 @@
int
APPLY_SPECIFIC
(
conv_fwd
)(
CudaNdarray
*
input
,
CudaNdarray
*
kerns
,
CudaNdarray
*
om
,
cudnnConvolutionDescriptor_t
desc
,
float
alpha
,
float
beta
,
CudaNdarray
**
output
)
{
float
alpha
,
float
beta
,
int
nb_dim
,
CudaNdarray
**
output
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
CudaNdarray_HOST_DIMS
(
input
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
...
...
@@ -11,37 +12,49 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return
1
;
}
if
(
c_set_tensor
4d
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
if
(
c_set_tensor
Nd
(
input
,
nb_dim
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
Nd
(
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
/* if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
/* if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
#ifdef CONV_INPLACE
Py_XDECREF
(
*
output
);
*
output
=
om
;
Py_INCREF
(
*
output
);
#else
if
(
CudaNdarray_prep_output
(
output
,
4
,
CudaNdarray_HOST_DIMS
(
om
))
!=
0
)
if
(
CudaNdarray_prep_output
(
output
,
nb_dim
,
CudaNdarray_HOST_DIMS
(
om
))
!=
0
)
return
1
;
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
output
,
om
))
return
1
;
#endif
if
(
c_set_tensor4d
(
*
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_tensorNd
(
*
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
/* if (c_set_tensor4d(*output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
{
size_t
worksize
;
void
*
workspace
;
cudnnConvolutionFwdAlgo_t
chosen_algo
;
for
(
int
i
=
0
;
(
i
<
nb_dim
);
i
++
)
std
::
cout
<<
i
<<
"/"
<<
nb_dim
<<
", "
<<
CudaNdarray_HOST_DIMS
(
input
)[
i
]
<<
", "
<<
CudaNdarray_HOST_DIMS
(
kerns
)[
i
]
<<
std
::
endl
;
if
(
CHOOSE_ALGO
)
{
// Check if the input and the kernels have the same shape as they have
// last time the apply node was executed
bool
same_shapes
=
true
;
for
(
int
i
=
0
;
(
i
<
4
)
&&
same_shapes
;
i
++
)
for
(
int
i
=
0
;
(
i
<
nb_dim
)
&&
same_shapes
;
i
++
)
{
same_shapes
&=
(
CudaNdarray_HOST_DIMS
(
input
)[
i
]
!=
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]);
...
...
@@ -115,7 +128,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
// Store the shapes of the inputs and kernels as well as the chosen
// algorithm for future use.
APPLY_SPECIFIC
(
previous_algo
)
=
chosen_algo
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
nb_dim
;
i
++
)
{
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]
=
CudaNdarray_HOST_DIMS
(
input
)[
i
];
...
...
@@ -142,7 +155,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
// If the chosen implementation is FFT, validate that it can be used
// on the current data and default on a safe implementation if it
// can't.
if
(
chosen_algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT
)
// Following code is 2d-specific, but it is fine as ftt is define only for 2d-filters
if
(
chosen_algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT
&&
nb_dim
==
4
)
{
// Extract the properties of the convolution descriptor
...
...
@@ -186,12 +200,12 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
chosen_algo
,
&
worksize
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
std
::
cout
<<
"here"
<<
std
::
endl
;
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error getting worksize: %s"
,
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
return
1
;
}
workspace
=
get_work_mem
(
worksize
);
if
(
workspace
==
NULL
&&
worksize
!=
0
)
return
1
;
...
...
@@ -208,6 +222,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC
(
output
),
CudaNdarray_DEV_DATA
(
*
output
));
}
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
std
::
cout
<<
"here2"
<<
std
::
endl
;
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error doing operation: %s"
,
cudnnGetErrorString
(
err
));
return
1
;
...
...
theano/sandbox/cuda/dnn_gi.c
浏览文件 @
6117f98b
...
...
@@ -3,7 +3,7 @@
int
APPLY_SPECIFIC
(
conv_gi
)(
CudaNdarray
*
kerns
,
CudaNdarray
*
output
,
CudaNdarray
*
im
,
cudnnConvolutionDescriptor_t
desc
,
float
alpha
,
float
beta
,
CudaNdarray
**
input
)
{
float
alpha
,
float
beta
,
int
nb_dim
,
CudaNdarray
**
input
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
CudaNdarray_HOST_DIMS
(
im
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
kerns
)[
1
])
{
...
...
@@ -12,9 +12,14 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return
1
;
}
if
(
c_set_tensor4d
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
/* if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
/* if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
if
(
c_set_tensorNd
(
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
Nd
(
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
#ifdef CONV_INPLACE
...
...
@@ -22,13 +27,16 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
*
input
=
im
;
Py_INCREF
(
*
input
);
#else
if
(
CudaNdarray_prep_output
(
input
,
4
,
CudaNdarray_HOST_DIMS
(
im
))
!=
0
)
if
(
CudaNdarray_prep_output
(
input
,
nb_dim
,
CudaNdarray_HOST_DIMS
(
im
))
!=
0
)
return
1
;
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
input
,
im
))
return
1
;
#endif
if
(
c_set_tensor4d
(
*
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
/* if (c_set_tensor4d(*input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
if
(
c_set_tensorNd
(
*
input
,
nb_dim
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
{
...
...
theano/sandbox/cuda/dnn_gw.c
浏览文件 @
6117f98b
...
...
@@ -3,7 +3,7 @@
int
APPLY_SPECIFIC
(
conv_gw
)(
CudaNdarray
*
input
,
CudaNdarray
*
output
,
CudaNdarray
*
km
,
cudnnConvolutionDescriptor_t
desc
,
float
alpha
,
float
beta
,
CudaNdarray
**
kerns
)
{
float
alpha
,
float
beta
,
int
nb_dim
,
CudaNdarray
**
kerns
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
CudaNdarray_HOST_DIMS
(
input
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
km
)[
1
])
{
...
...
@@ -12,9 +12,14 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return
1
;
}
if
(
c_set_tensor4d
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
/* if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
/* if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
if
(
c_set_tensorNd
(
input
,
nb_dim
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
if
(
c_set_tensor
4d
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
if
(
c_set_tensor
Nd
(
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
#ifdef CONV_INPLACE
...
...
@@ -22,13 +27,15 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
*
kerns
=
km
;
Py_INCREF
(
*
kerns
);
#else
if
(
CudaNdarray_prep_output
(
kerns
,
4
,
CudaNdarray_HOST_DIMS
(
km
))
!=
0
)
if
(
CudaNdarray_prep_output
(
kerns
,
nb_dim
,
CudaNdarray_HOST_DIMS
(
km
))
!=
0
)
return
1
;
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
kerns
,
km
))
return
1
;
#endif
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
/* if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
if
(
c_set_filterNd
(
*
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
{
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
6117f98b
...
...
@@ -13,6 +13,7 @@ from theano.tensor.signal.downsample import max_pool_2d
from
theano.tensor.signal.downsample
import
DownsampleFactorMaxGrad
import
theano.sandbox.cuda.dnn
as
dnn
from
theano.sandbox.cuda.basic_ops
import
GpuAllocEmpty
,
gpu_alloc_empty
from
theano.sandbox.cuda
import
float32_shared_constructor
as
shared
# Skip test if cuda_ndarray is not available.
import
theano.sandbox.cuda
as
cuda
...
...
@@ -763,6 +764,58 @@ def test_dnn_conv_grad():
utt
.
verify_grad
(
dconvw
,
[
img_val
,
kern_val
,
out_val
])
def
test_conv3d_valid
():
print
dnn
.
version
()
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
'"3D conv not supported in cudnn v1'
)
def
run_conv3d_valid
(
inputs_shape
,
filters_shape
,
subsample
=
(
1
,
1
,
1
)):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
inputs
=
shared
(
inputs_val
)
filters
=
shared
(
filters_val
)
bias
=
shared
(
numpy
.
zeros
(
filters_shape
[
0
])
.
astype
(
'float32'
))
conv_ref
=
theano
.
tensor
.
nnet
.
conv3D
(
V
=
inputs
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
),
W
=
filters
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
),
b
=
bias
,
d
=
subsample
)
conv
=
dnn
.
dnn_conv3d
(
img
=
inputs
,
kerns
=
filters
,
border_mode
=
"valid"
,
subsample
=
subsample
,
conv_mode
=
'cross'
)
f_ref
=
theano
.
function
([],
conv_ref
.
dimshuffle
(
0
,
4
,
1
,
2
,
3
))
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
res_ref
=
f_ref
()
res
=
f
()
print
res_ref
.
shape
,
res
.
shape
utt
.
assert_allclose
(
res_ref
,
res
)
run_conv3d_valid
(
inputs_shape
=
(
128
,
3
,
5
,
5
,
5
),
filters_shape
=
(
64
,
3
,
1
,
2
,
4
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
4
,
20
,
12
,
15
),
filters_shape
=
(
10
,
4
,
6
,
12
,
4
),
subsample
=
(
2
,
2
,
2
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
4
,
20
,
12
,
15
),
filters_shape
=
(
10
,
4
,
6
,
12
,
4
),
subsample
=
(
2
,
2
,
2
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
3
,
3
,
3
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
2
,
20
,
12
,
15
),
filters_shape
=
(
10
,
2
,
6
,
12
,
4
),
subsample
=
(
3
,
3
,
3
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
3
,
2
,
1
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
1
,
2
,
3
))
def
test_version
():
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论