Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
7f78fce1
提交
7f78fce1
authored
6月 18, 2015
作者:
Nicolas Ballas
提交者:
--global
8月 04, 2015
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make cudnn conv3d gradI and gradW works
上级
6117f98b
显示空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
112 行增加
和
185 行删除
+112
-185
dnn.py
theano/sandbox/cuda/dnn.py
+4
-88
dnn_base.c
theano/sandbox/cuda/dnn_base.c
+0
-62
dnn_fwd.c
theano/sandbox/cuda/dnn_fwd.c
+0
-13
dnn_gi.c
theano/sandbox/cuda/dnn_gi.c
+0
-8
dnn_gw.c
theano/sandbox/cuda/dnn_gw.c
+2
-2
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+106
-12
没有找到文件。
theano/sandbox/cuda/dnn.py
浏览文件 @
7f78fce1
...
@@ -908,7 +908,7 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
...
@@ -908,7 +908,7 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
def
__init__
(
self
,
inplace
=
False
,
workmem
=
None
):
def
__init__
(
self
,
inplace
=
False
,
workmem
=
None
):
### Only workmem = 'none' work with cudnn conv 3d
### Only workmem = 'none' work with cudnn conv 3d
super
(
GpuDnnConv3dGradW
,
self
)
.
__init
(
inplace
=
inplace
,
workmem
=
'none'
)
super
(
GpuDnnConv3dGradW
,
self
)
.
__init
__
(
inplace
=
inplace
,
workmem
=
'none'
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
img
,
top
,
output
,
desc
,
alpha
,
beta
,
nb_dim
=
inp
img
,
top
,
output
,
desc
,
alpha
,
beta
,
nb_dim
=
inp
...
@@ -1051,7 +1051,7 @@ class GpuDnnConvGradI(DnnBase, COp):
...
@@ -1051,7 +1051,7 @@ class GpuDnnConvGradI(DnnBase, COp):
class
GpuDnnConv
Grad3
dI
(
GpuDnnConvGradI
):
class
GpuDnnConv
3dGra
dI
(
GpuDnnConvGradI
):
"""
"""
The convolution gradient with respect to the inputs.
The convolution gradient with respect to the inputs.
...
@@ -1065,7 +1065,7 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI):
...
@@ -1065,7 +1065,7 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI):
'descriptor'
,
'alpha'
,
'beta'
)
'descriptor'
,
'alpha'
,
'beta'
)
def
__init__
(
self
,
inplace
=
False
):
def
__init__
(
self
,
inplace
=
False
):
super
(
GpuDnnConvGradI
,
self
)
.
__init__
(
inplace
)
super
(
GpuDnnConv
3d
GradI
,
self
)
.
__init__
(
inplace
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
...
@@ -1091,7 +1091,7 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI):
...
@@ -1091,7 +1091,7 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI):
raise
TypeError
(
'kern must be 5D tensor'
)
raise
TypeError
(
'kern must be 5D tensor'
)
if
topgrad
.
type
.
ndim
!=
5
:
if
topgrad
.
type
.
ndim
!=
5
:
raise
TypeError
(
'topgrad must be 5D tensor'
)
raise
TypeError
(
'topgrad must be 5D tensor'
)
if
output
.
type
.
ndim
!=
4
:
if
output
.
type
.
ndim
!=
5
:
raise
TypeError
(
'output must be 5D tensor'
)
raise
TypeError
(
'output must be 5D tensor'
)
if
not
isinstance
(
desc
.
type
,
CDataType
)
\
if
not
isinstance
(
desc
.
type
,
CDataType
)
\
...
@@ -1107,90 +1107,6 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI):
...
@@ -1107,90 +1107,6 @@ class GpuDnnConvGrad3dI(GpuDnnConvGradI):
def
dnn_conv
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
):
"""
GPU convolution using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
'first dim', 'second dim' in that order.
:param img: images to do the convolution over
:param kerns: convolution filters
:param border_mode: one of 'valid', 'full'; additionally, the padding size
could be directly specified by an integer or a pair of integers
:param subsample: perform subsampling of the output (default: (1, 1))
:param conv_mode: perform convolution (kernels flipped) or cross-correlation.
One of 'conv', 'cross'. (default: 'conv')
:param direction_hint: Used by graph optimizers to change algorithm choice.
By default, GpuDnnConv will be used to carry out the convolution.
If border_mode is 'valid', subsample is (1,1) and direction_hint is
'bprop weights', it will use GpuDnnConvGradW.
If border_mode is 'full', subsample is (1,1) and direction_hint is
*not* 'forward!', it will use GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned.
:param workmem: Specify the amount of working memory allowed.
More memory is usually faster. One of 'none', 'small' or
'large'. (default is None which takes its value from
:attr:`config.dnn.conv.workmem`)
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
fgraph
=
getattr
(
img
,
'fgraph'
,
None
)
or
getattr
(
kerns
,
'fgraph'
,
None
)
if
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
direction_hint
==
'bprop weights'
):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
img
=
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
))
if
conv_mode
==
'conv'
:
# We need to flip manually. These 'kerns' are not the kernels
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
out
=
gpu_alloc_empty
(
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'cross'
)(
img
.
shape
,
out
.
shape
)
conv
=
GpuDnnConvGradW
()(
img
,
kerns
,
out
,
desc
)
return
as_cuda_ndarray_variable
(
conv
.
dimshuffle
(
1
,
0
,
2
,
3
))
elif
(
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
direction_hint
!=
'forward!'
):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
# We just need to set up a suitable 'fake' valid convolution.
img
=
gpu_contiguous
(
img
)
# cudnn v1 and v2 rc3 need contiguous data
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
out
=
gpu_alloc_empty
(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape2
,
shape3
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
conv_mode
)(
out
.
shape
,
kerns
.
shape
)
return
GpuDnnConvGradI
()(
kerns
,
img
,
out
,
desc
)
# Standard case: We use GpuDnnConv with suitable padding.
# contig_version will return a gpu_contiguous copy
# if the img contains negative strides
img
=
gpu_contiguous
(
img
)
kerns
=
gpu_contiguous
(
kerns
)
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)(
img
.
shape
,
kerns
.
shape
)
desc_op
=
desc
.
owner
.
op
out_shp
=
GpuDnnConv
.
get_out_shape
(
img
.
shape
,
kerns
.
shape
,
desc_op
.
border_mode
,
desc_op
.
subsample
)
out
=
gpu_alloc_empty
(
*
out_shp
)
return
GpuDnnConv
(
workmem
=
workmem
)(
img
,
kerns
,
out
,
desc
)
def
dnn_conv
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
def
dnn_conv
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
):
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
):
"""
"""
...
...
theano/sandbox/cuda/dnn_base.c
浏览文件 @
7f78fce1
#section support_code
#section support_code
static
cudnnHandle_t
_handle
=
NULL
;
static
cudnnHandle_t
_handle
=
NULL
;
static
int
c_set_tensor4d
(
CudaNdarray
*
var
,
cudnnTensorDescriptor_t
desc
)
{
cudnnStatus_t
err
=
cudnnSetTensor4dDescriptorEx
(
desc
,
CUDNN_DATA_FLOAT
,
CudaNdarray_HOST_DIMS
(
var
)[
0
],
CudaNdarray_HOST_DIMS
(
var
)[
1
],
CudaNdarray_HOST_DIMS
(
var
)[
2
],
CudaNdarray_HOST_DIMS
(
var
)[
3
],
CudaNdarray_HOST_STRIDES
(
var
)[
0
]
?
CudaNdarray_HOST_STRIDES
(
var
)[
0
]
:
CudaNdarray_HOST_DIMS
(
var
)[
2
]
*
CudaNdarray_HOST_DIMS
(
var
)[
3
]
*
CudaNdarray_HOST_DIMS
(
var
)[
1
],
CudaNdarray_HOST_STRIDES
(
var
)[
1
]
?
CudaNdarray_HOST_STRIDES
(
var
)[
1
]
:
CudaNdarray_HOST_DIMS
(
var
)[
2
]
*
CudaNdarray_HOST_DIMS
(
var
)[
3
],
CudaNdarray_HOST_STRIDES
(
var
)[
2
]
?
CudaNdarray_HOST_STRIDES
(
var
)[
2
]
:
CudaNdarray_HOST_DIMS
(
var
)[
3
],
CudaNdarray_HOST_STRIDES
(
var
)[
3
]
?
CudaNdarray_HOST_STRIDES
(
var
)[
3
]
:
1
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set tensor4d descriptor: %s"
"shapes=%d %d %d %d strides=%d %d %d %d"
,
cudnnGetErrorString
(
err
),
CudaNdarray_HOST_DIMS
(
var
)[
0
],
CudaNdarray_HOST_DIMS
(
var
)[
1
],
CudaNdarray_HOST_DIMS
(
var
)[
2
],
CudaNdarray_HOST_DIMS
(
var
)[
3
],
CudaNdarray_HOST_STRIDES
(
var
)[
0
]
?
CudaNdarray_HOST_STRIDES
(
var
)[
0
]
:
CudaNdarray_HOST_DIMS
(
var
)[
2
]
*
CudaNdarray_HOST_DIMS
(
var
)[
3
]
*
CudaNdarray_HOST_DIMS
(
var
)[
1
],
CudaNdarray_HOST_STRIDES
(
var
)[
1
]
?
CudaNdarray_HOST_STRIDES
(
var
)[
1
]
:
CudaNdarray_HOST_DIMS
(
var
)[
2
]
*
CudaNdarray_HOST_DIMS
(
var
)[
3
],
CudaNdarray_HOST_STRIDES
(
var
)[
2
]
?
CudaNdarray_HOST_STRIDES
(
var
)[
2
]
:
CudaNdarray_HOST_DIMS
(
var
)[
3
],
CudaNdarray_HOST_STRIDES
(
var
)[
3
]
?
CudaNdarray_HOST_STRIDES
(
var
)[
3
]
:
1
);
return
-
1
;
}
return
0
;
}
static
int
static
int
c_set_tensorNd
(
CudaNdarray
*
var
,
int
dim
,
cudnnTensorDescriptor_t
desc
)
{
c_set_tensorNd
(
CudaNdarray
*
var
,
int
dim
,
cudnnTensorDescriptor_t
desc
)
{
...
@@ -85,36 +53,6 @@ c_set_filterNd(CudaNdarray *var, int dim, cudnnFilterDescriptor_t desc) {
...
@@ -85,36 +53,6 @@ c_set_filterNd(CudaNdarray *var, int dim, cudnnFilterDescriptor_t desc) {
return
0
;
return
0
;
}
}
static
int
c_set_filter
(
CudaNdarray
*
var
,
cudnnFilterDescriptor_t
desc
)
{
if
(
!
CudaNdarray_is_c_contiguous
(
var
))
{
PyErr_SetString
(
PyExc_ValueError
,
"Only contiguous filters (kernels) are supported."
);
return
-
1
;
}
cudnnStatus_t
err
=
cudnnSetFilter4dDescriptor
(
desc
,
CUDNN_DATA_FLOAT
,
CudaNdarray_HOST_DIMS
(
var
)[
0
],
CudaNdarray_HOST_DIMS
(
var
)[
1
],
CudaNdarray_HOST_DIMS
(
var
)[
2
],
CudaNdarray_HOST_DIMS
(
var
)[
3
]
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set filter descriptor: %s."
" dims= %d %d %d %d"
,
cudnnGetErrorString
(
err
),
CudaNdarray_HOST_DIMS
(
var
)[
0
],
CudaNdarray_HOST_DIMS
(
var
)[
1
],
CudaNdarray_HOST_DIMS
(
var
)[
2
],
CudaNdarray_HOST_DIMS
(
var
)[
3
]);
return
-
1
;
}
return
0
;
}
#section init_code
#section init_code
{
{
...
...
theano/sandbox/cuda/dnn_fwd.c
浏览文件 @
7f78fce1
...
@@ -16,10 +16,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -16,10 +16,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return
1
;
return
1
;
if
(
c_set_filterNd
(
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filterNd
(
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
return
1
;
/* if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
/* if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
Py_XDECREF
(
*
output
);
Py_XDECREF
(
*
output
);
...
@@ -35,19 +31,11 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -35,19 +31,11 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if
(
c_set_tensorNd
(
*
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
if
(
c_set_tensorNd
(
*
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
return
1
;
/* if (c_set_tensor4d(*output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
{
{
size_t
worksize
;
size_t
worksize
;
void
*
workspace
;
void
*
workspace
;
cudnnConvolutionFwdAlgo_t
chosen_algo
;
cudnnConvolutionFwdAlgo_t
chosen_algo
;
for
(
int
i
=
0
;
(
i
<
nb_dim
);
i
++
)
std
::
cout
<<
i
<<
"/"
<<
nb_dim
<<
", "
<<
CudaNdarray_HOST_DIMS
(
input
)[
i
]
<<
", "
<<
CudaNdarray_HOST_DIMS
(
kerns
)[
i
]
<<
std
::
endl
;
if
(
CHOOSE_ALGO
)
if
(
CHOOSE_ALGO
)
{
{
...
@@ -222,7 +210,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -222,7 +210,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC
(
output
),
CudaNdarray_DEV_DATA
(
*
output
));
APPLY_SPECIFIC
(
output
),
CudaNdarray_DEV_DATA
(
*
output
));
}
}
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
std
::
cout
<<
"here2"
<<
std
::
endl
;
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error doing operation: %s"
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error doing operation: %s"
,
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
return
1
;
return
1
;
...
...
theano/sandbox/cuda/dnn_gi.c
浏览文件 @
7f78fce1
...
@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return
1
;
return
1
;
}
}
/* if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
/* if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
if
(
c_set_tensorNd
(
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
if
(
c_set_tensorNd
(
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
return
1
;
if
(
c_set_filterNd
(
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filterNd
(
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
...
@@ -33,9 +28,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -33,9 +28,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return
1
;
return
1
;
#endif
#endif
/* if (c_set_tensor4d(*input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
if
(
c_set_tensorNd
(
*
input
,
nb_dim
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
if
(
c_set_tensorNd
(
*
input
,
nb_dim
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
return
1
;
...
...
theano/sandbox/cuda/dnn_gw.c
浏览文件 @
7f78fce1
...
@@ -48,7 +48,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -48,7 +48,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
// Check if the input and the output have the same shape as they have
// Check if the input and the output have the same shape as they have
// last time the apply node was executed
// last time the apply node was executed
bool
same_shapes
=
true
;
bool
same_shapes
=
true
;
for
(
int
i
=
0
;
(
i
<
4
)
&&
same_shapes
;
i
++
)
for
(
int
i
=
0
;
(
i
<
nb_dim
)
&&
same_shapes
;
i
++
)
{
{
same_shapes
&=
(
CudaNdarray_HOST_DIMS
(
input
)[
i
]
!=
same_shapes
&=
(
CudaNdarray_HOST_DIMS
(
input
)[
i
]
!=
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]);
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]);
...
@@ -93,7 +93,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -93,7 +93,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
// Store the shapes of the inputs and kernels as well as the chosen
// Store the shapes of the inputs and kernels as well as the chosen
// algorithm for future use.
// algorithm for future use.
APPLY_SPECIFIC
(
previous_bwd_f_algo
)
=
chosen_algo
;
APPLY_SPECIFIC
(
previous_bwd_f_algo
)
=
chosen_algo
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
nb_dim
;
i
++
)
{
{
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]
=
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]
=
CudaNdarray_HOST_DIMS
(
input
)[
i
];
CudaNdarray_HOST_DIMS
(
input
)[
i
];
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
7f78fce1
...
@@ -764,13 +764,12 @@ def test_dnn_conv_grad():
...
@@ -764,13 +764,12 @@ def test_dnn_conv_grad():
utt
.
verify_grad
(
dconvw
,
[
img_val
,
kern_val
,
out_val
])
utt
.
verify_grad
(
dconvw
,
[
img_val
,
kern_val
,
out_val
])
def
test_conv3d_
vali
d
():
def
test_conv3d_
fw
d
():
print
dnn
.
version
()
if
not
cuda
.
dnn
.
dnn_available
()
and
dnn
.
version
()[
0
]
>=
3000
:
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
'"3D conv not supported in cudnn v1'
)
raise
SkipTest
(
'"3D conv not supported in cudnn v1'
)
def
run_conv3d_
vali
d
(
inputs_shape
,
filters_shape
,
def
run_conv3d_
fw
d
(
inputs_shape
,
filters_shape
,
subsample
=
(
1
,
1
,
1
)):
subsample
=
(
1
,
1
,
1
)):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
...
@@ -791,31 +790,126 @@ def test_conv3d_valid():
...
@@ -791,31 +790,126 @@ def test_conv3d_valid():
res_ref
=
f_ref
()
res_ref
=
f_ref
()
res
=
f
()
res
=
f
()
print
res_ref
.
shape
,
res
.
shape
utt
.
assert_allclose
(
res_ref
,
res
)
utt
.
assert_allclose
(
res_ref
,
res
)
run_conv3d_
vali
d
(
inputs_shape
=
(
128
,
3
,
5
,
5
,
5
),
run_conv3d_
fw
d
(
inputs_shape
=
(
128
,
3
,
5
,
5
,
5
),
filters_shape
=
(
64
,
3
,
1
,
2
,
4
))
filters_shape
=
(
64
,
3
,
1
,
2
,
4
))
run_conv3d_
vali
d
(
inputs_shape
=
(
16
,
4
,
20
,
12
,
15
),
run_conv3d_
fw
d
(
inputs_shape
=
(
16
,
4
,
20
,
12
,
15
),
filters_shape
=
(
10
,
4
,
6
,
12
,
4
),
filters_shape
=
(
10
,
4
,
6
,
12
,
4
),
subsample
=
(
2
,
2
,
2
))
subsample
=
(
2
,
2
,
2
))
run_conv3d_
vali
d
(
inputs_shape
=
(
16
,
4
,
20
,
12
,
15
),
run_conv3d_
fw
d
(
inputs_shape
=
(
16
,
4
,
20
,
12
,
15
),
filters_shape
=
(
10
,
4
,
6
,
12
,
4
),
filters_shape
=
(
10
,
4
,
6
,
12
,
4
),
subsample
=
(
2
,
2
,
2
))
subsample
=
(
2
,
2
,
2
))
run_conv3d_
vali
d
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
run_conv3d_
fw
d
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
3
,
3
,
3
))
subsample
=
(
3
,
3
,
3
))
run_conv3d_
vali
d
(
inputs_shape
=
(
16
,
2
,
20
,
12
,
15
),
run_conv3d_
fw
d
(
inputs_shape
=
(
16
,
2
,
20
,
12
,
15
),
filters_shape
=
(
10
,
2
,
6
,
12
,
4
),
filters_shape
=
(
10
,
2
,
6
,
12
,
4
),
subsample
=
(
3
,
3
,
3
))
subsample
=
(
3
,
3
,
3
))
run_conv3d_
vali
d
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
run_conv3d_
fw
d
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
3
,
2
,
1
))
subsample
=
(
3
,
2
,
1
))
run_conv3d_
vali
d
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
run_conv3d_
fw
d
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
1
,
2
,
3
))
subsample
=
(
1
,
2
,
3
))
def
test_conv3d_gradweight
():
if
not
cuda
.
dnn
.
dnn_available
()
and
dnn
.
version
()[
0
]
>=
3000
:
raise
SkipTest
(
'"3D conv not supported in cudnn v1'
)
def
run_gradweight
(
inputs_shape
,
filters_shape
,
dCdH_shape
,
subsample
=
(
1
,
1
,
1
)):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
dCdH_val
=
numpy
.
random
.
random
(
dCdH_shape
)
.
astype
(
'float32'
)
kern_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
inputs
=
shared
(
inputs_val
)
dCdH
=
shared
(
dCdH_val
)
kern
=
shared
(
kern_val
)
filters_shape_s
=
(
filters_shape
[
0
],
filters_shape
[
2
],
filters_shape
[
3
],
filters_shape
[
4
],
filters_shape
[
1
])
conv
=
theano
.
tensor
.
nnet
.
convGrad3D
(
V
=
inputs
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
),
dCdH
=
dCdH
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
),
WShape
=
filters_shape_s
,
d
=
subsample
)
desc
=
dnn
.
GpuDnnConv3dDesc
(
border_mode
=
'valid'
,
subsample
=
subsample
,
conv_mode
=
'cross'
)(
inputs
.
shape
,
kern
.
shape
)
gradW
=
dnn
.
GpuDnnConv3dGradW
()(
inputs
,
dCdH
,
kern
,
desc
)
f_ref
=
theano
.
function
([],
conv
.
dimshuffle
(
0
,
4
,
1
,
2
,
3
))
f
=
theano
.
function
([],
gradW
,
mode
=
mode_with_gpu
)
res_ref
=
f_ref
()
res
=
f
()
utt
.
assert_allclose
(
res_ref
,
res
)
run_gradweight
(
inputs_shape
=
(
16
,
1
,
10
,
12
,
16
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
dCdH_shape
=
(
16
,
10
,
5
,
1
,
13
),
subsample
=
(
1
,
1
,
1
))
run_gradweight
(
inputs_shape
=
(
16
,
1
,
20
,
10
,
16
),
filters_shape
=
(
10
,
1
,
6
,
4
,
4
),
dCdH_shape
=
(
16
,
10
,
8
,
4
,
7
),
subsample
=
(
2
,
2
,
2
))
run_gradweight
(
inputs_shape
=
(
16
,
1
,
20
,
10
,
16
),
filters_shape
=
(
10
,
1
,
6
,
3
,
4
),
dCdH_shape
=
(
16
,
10
,
5
,
3
,
5
),
subsample
=
(
3
,
3
,
3
))
run_gradweight
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
16
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
dCdH_shape
=
(
16
,
10
,
8
,
1
,
5
),
subsample
=
(
2
,
1
,
3
))
def
test_conv3d_gradinput
():
if
not
cuda
.
dnn
.
dnn_available
()
and
dnn
.
version
()[
0
]
>=
3000
:
raise
SkipTest
(
'"3D conv not supported in cudnn v1'
)
def
run_gradinput
(
inputs_shape
,
filters_shape
,
subsample
=
(
1
,
1
,
1
)):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
inputs
=
shared
(
inputs_val
)
filters
=
shared
(
filters_val
)
bias
=
shared
(
numpy
.
zeros
(
filters_shape
[
1
])
.
astype
(
'float32'
))
conv
=
theano
.
tensor
.
nnet
.
convTransp3D
(
W
=
filters
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
),
b
=
bias
,
d
=
subsample
,
H
=
inputs
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
))
f_ref
=
theano
.
function
([],
conv
.
dimshuffle
(
0
,
4
,
1
,
2
,
3
))
res_ref
=
f_ref
()
bottom_shape
=
res_ref
.
shape
bottom_val
=
numpy
.
random
.
random
(
bottom_shape
)
.
astype
(
'float32'
)
bottom
=
shared
(
bottom_val
)
desc
=
dnn
.
GpuDnnConv3dDesc
(
border_mode
=
'valid'
,
subsample
=
subsample
,
conv_mode
=
'cross'
)(
bottom
.
shape
,
filters
.
shape
)
gradI
=
dnn
.
GpuDnnConv3dGradI
()(
filters
,
inputs
,
bottom
,
desc
)
f
=
theano
.
function
([],
gradI
,
mode
=
mode_with_gpu
)
res
=
f
()
utt
.
assert_allclose
(
res_ref
,
res
)
run_gradinput
(
inputs_shape
=
(
16
,
10
,
15
,
12
,
12
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
))
run_gradinput
(
inputs_shape
=
(
16
,
10
,
15
,
12
,
12
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
2
,
2
,
2
))
run_gradinput
(
inputs_shape
=
(
16
,
10
,
15
,
12
,
12
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
3
,
3
,
3
))
run_gradinput
(
inputs_shape
=
(
16
,
10
,
15
,
12
,
12
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
3
,
1
,
2
))
def
test_version
():
def
test_version
():
if
not
cuda
.
dnn
.
dnn_available
():
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论