Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
6117f98b
提交
6117f98b
authored
6月 18, 2015
作者:
Nicolas Ballas
提交者:
--global
8月 04, 2015
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cudnnv3 conv3d
上级
34223240
显示空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
737 行增加
和
40 行删除
+737
-40
dnn.py
theano/sandbox/cuda/dnn.py
+575
-15
dnn_base.c
theano/sandbox/cuda/dnn_base.c
+54
-0
dnn_conv_base.c
theano/sandbox/cuda/dnn_conv_base.c
+6
-6
dnn_fwd.c
theano/sandbox/cuda/dnn_fwd.c
+24
-9
dnn_gi.c
theano/sandbox/cuda/dnn_gi.c
+13
-5
dnn_gw.c
theano/sandbox/cuda/dnn_gw.c
+12
-5
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+53
-0
没有找到文件。
theano/sandbox/cuda/dnn.py
浏览文件 @
6117f98b
...
@@ -127,6 +127,30 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
...
@@ -127,6 +127,30 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
"""
%
dict
(
var
=
var
,
err
=
err
,
desc
=
desc
,
fail
=
fail
)
"""
%
dict
(
var
=
var
,
err
=
err
,
desc
=
desc
,
fail
=
fail
)
def
c_set_tensorNd
(
var
,
nb_dim
,
desc
,
err
,
fail
):
return
"""CudaNdarra
{
int* stride = CudaNdarray_HOST_STRIDES(
%(var)
s);
%(err)
s = cudnnSetTensorNdDescriptorEx(
%(desc)
s, nb_dim, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(
%(var)
s),
CudaNdarray_HOST_STRIDES(
%(var)
s));
);
if (
%(err)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"could not set tensorNd descriptor:
%%
s,
%
d"
cudnnGetErrorString(
%(err)
s),
nb_dim,
);
%(fail)
s
}
}
"""
%
dict
(
var
=
var
,
nb_dim
=
nb_dim
,
err
=
err
,
desc
=
desc
,
fail
=
fail
)
class
DnnBase
(
GpuOp
,
COp
):
class
DnnBase
(
GpuOp
,
COp
):
"""
"""
...
@@ -359,10 +383,146 @@ AddConfigVar('dnn.conv.workmem_bwd',
...
@@ -359,10 +383,146 @@ AddConfigVar('dnn.conv.workmem_bwd',
EnumStr
(
'deterministic'
,
'none'
,
'fft'
,
'guess'
),
EnumStr
(
'deterministic'
,
'none'
,
'fft'
,
'guess'
),
in_c_key
=
False
)
in_c_key
=
False
)
class
GpuDnnConv3dDesc
(
GpuOp
):
"""This Op builds a 3d convolution descriptor for use in the other
3d convolution operations.
see the doc of :func:`dnn_conv` for a description of the parameters
"""
__props__
=
(
'border_mode'
,
'subsample'
,
'conv_mode'
)
def
c_headers
(
self
):
return
[
'cudnn.h'
,
'cudnn_helper.h'
]
def
c_header_dirs
(
self
):
return
[
os
.
path
.
dirname
(
__file__
)]
def
c_libraries
(
self
):
return
[
'cudnn'
]
def
c_compiler
(
self
):
return
NVCC_compiler
def
__init__
(
self
,
border_mode
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
):
if
isinstance
(
border_mode
,
int
):
border_mode
=
(
border_mode
,
border_mode
,
border_mode
)
if
isinstance
(
border_mode
,
tuple
):
pad_d
,
pad_h
,
pad_w
=
map
(
int
,
border_mode
)
border_mode
=
(
pad_d
,
pad_h
,
pad_w
)
if
not
((
isinstance
(
border_mode
,
tuple
)
and
min
(
border_mode
)
>=
0
)
or
border_mode
in
(
'valid'
,
'full'
)):
raise
ValueError
(
'invalid border_mode {}, which must be either '
'"valid", "full", an integer or a pair of'
' integers'
.
format
(
border_mode
))
self
.
border_mode
=
border_mode
assert
len
(
subsample
)
==
3
self
.
subsample
=
subsample
assert
conv_mode
in
(
'conv'
,
'cross'
)
self
.
conv_mode
=
conv_mode
def
make_node
(
self
,
img_shape
,
kern_shape
):
if
img_shape
.
type
.
ndim
!=
1
or
img_shape
.
type
.
dtype
!=
'int64'
:
raise
TypeError
(
'img must be 1D shape tensor'
)
if
kern_shape
.
type
.
ndim
!=
1
or
kern_shape
.
type
.
dtype
!=
'int64'
:
raise
TypeError
(
'kern must be 1D shape tensor'
)
return
Apply
(
self
,
[
img_shape
,
kern_shape
],
[
CDataType
(
"cudnnConvolutionDescriptor_t"
)()])
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
img_shape
,
kern_shape
=
inputs
desc
,
=
outputs
if
isinstance
(
self
.
border_mode
,
tuple
):
pad_d_spec
,
pad_h_spec
,
pad_w_spec
=
map
(
int
,
self
.
border_mode
)
assert
pad_d_spec
,
pad_h_spec
>=
0
and
pad_w_spec
>=
0
bmode
=
2
else
:
pad_d_spec
=
pad_h_spec
=
pad_w_spec
=
0
if
self
.
border_mode
==
"valid"
:
bmode
=
1
else
:
assert
self
.
border_mode
==
"full"
bmode
=
0
if
self
.
conv_mode
==
'conv'
:
conv_flag
=
'CUDNN_CONVOLUTION'
else
:
conv_flag
=
'CUDNN_CROSS_CORRELATION'
return
"""
{
cudnnStatus_t err;
int pad[3];
if ((err = cudnnCreateConvolutionDescriptor(&
%(desc)
s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate convolution "
"descriptor:
%%
s", cudnnGetErrorString(err));
%(fail)
s
}
if (
%(bmode)
d == 2) {
pad[0] =
%(pad_d_spec)
d;
pad[1] =
%(pad_h_spec)
d;
pad[2] =
%(pad_w_spec)
d;
} else if (
%(bmode)
d == 1) {
pad[0] = 0;
pad[1] = 0;
pad[2] = 0;
} else if (
%(bmode)
d == 0) {
pad[0] = *(npy_int64 *)PyArray_GETPTR1(
%(kern_shape)
s, 2) - 1;
pad[1] = *(npy_int64 *)PyArray_GETPTR1(
%(kern_shape)
s, 3) - 1;
pad[2] = *(npy_int64 *)PyArray_GETPTR1(
%(kern_shape)
s, 4) - 1;
} else {
PyErr_SetString(PyExc_ValueError, "bad border mode");
%(fail)
s
}
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 30
int subsample[3];
subsample[0] =
%(subsd)
d;
subsample[1] =
%(subsx)
d;
subsample[2] =
%(subsy)
d;
int upscale[3] = {1, 1, 1};
err = cudnnSetConvolutionNdDescriptor(
%(desc)
s,
3,
pad, subsample, upscale,
%(conv_flag)
s
);
#else
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: CUDNN_VERSION must be >= 30");
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
cudnnGetErrorString(err));
%(fail)
s
}
}
"""
%
dict
(
name
=
name
,
img_shape
=
img_shape
,
kern_shape
=
kern_shape
,
desc
=
desc
,
bmode
=
bmode
,
conv_flag
=
conv_flag
,
fail
=
sub
[
'fail'
],
subsd
=
self
.
subsample
[
0
],
subsx
=
self
.
subsample
[
1
],
subsy
=
self
.
subsample
[
2
],
pad_d_spec
=
pad_d_spec
,
pad_h_spec
=
pad_h_spec
,
pad_w_spec
=
pad_w_spec
)
def
c_code_cache_version
(
self
):
return
None
# return (2, version())
# scalar constants
# scalar constants
_zero
=
constant
(
numpy
.
asarray
(
0.0
,
dtype
=
'float32'
))
_zero
=
constant
(
numpy
.
asarray
(
0.0
,
dtype
=
'float32'
))
_one
=
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
_one
=
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
_ifour
=
constant
(
numpy
.
asarray
(
4
,
dtype
=
'int32'
))
_ifive
=
constant
(
numpy
.
asarray
(
5
,
dtype
=
'int32'
))
def
ensure_float
(
val
,
default
,
name
):
def
ensure_float
(
val
,
default
,
name
):
if
val
is
None
:
if
val
is
None
:
...
@@ -377,6 +537,19 @@ def ensure_float(val, default, name):
...
@@ -377,6 +537,19 @@ def ensure_float(val, default, name):
raise
TypeError
(
"
%
s: type is not float32"
%
(
name
,))
raise
TypeError
(
"
%
s: type is not float32"
%
(
name
,))
return
val
return
val
def
ensure_int
(
val
,
default
,
name
):
if
val
is
None
:
return
default
.
clone
()
if
not
isinstance
(
val
,
Variable
):
val
=
constant
(
val
)
if
hasattr
(
val
,
'ndim'
)
and
val
.
ndim
==
0
:
val
=
as_scalar
(
val
)
if
not
isinstance
(
val
.
type
,
theano
.
scalar
.
Scalar
):
raise
TypeError
(
"
%
s: expected a scalar value"
%
(
name
,))
if
not
val
.
type
.
dtype
==
'int32'
:
raise
TypeError
(
"
%
s: type is not int32"
%
(
name
,))
return
val
class
GpuDnnConv
(
DnnBase
,
COp
):
class
GpuDnnConv
(
DnnBase
,
COp
):
"""
"""
...
@@ -449,7 +622,7 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -449,7 +622,7 @@ class GpuDnnConv(DnnBase, COp):
return
[
alg_def
,
alg_choose_def
,
alg_choose_time_def
]
+
inpl_def
return
[
alg_def
,
alg_choose_def
,
alg_choose_time_def
]
+
inpl_def
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
,
nb_dim
=
None
):
img
=
as_cuda_ndarray_variable
(
img
)
img
=
as_cuda_ndarray_variable
(
img
)
kern
=
as_cuda_ndarray_variable
(
kern
)
kern
=
as_cuda_ndarray_variable
(
kern
)
output
=
as_cuda_ndarray_variable
(
output
)
output
=
as_cuda_ndarray_variable
(
output
)
...
@@ -466,12 +639,13 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -466,12 +639,13 @@ class GpuDnnConv(DnnBase, COp):
alpha
=
ensure_float
(
alpha
,
_one
,
'alpha'
)
alpha
=
ensure_float
(
alpha
,
_one
,
'alpha'
)
beta
=
ensure_float
(
beta
,
_zero
,
'beta'
)
beta
=
ensure_float
(
beta
,
_zero
,
'beta'
)
nb_dim
=
ensure_int
(
nb_dim
,
_ifour
,
'nb_dim'
)
return
Apply
(
self
,
[
img
,
kern
,
output
,
desc
,
alpha
,
beta
],
return
Apply
(
self
,
[
img
,
kern
,
output
,
desc
,
alpha
,
beta
,
nb_dim
],
[
output
.
type
()])
[
output
.
type
()])
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
img
,
kerns
,
output
,
desc
,
alpha
,
beta
=
inp
img
,
kerns
,
output
,
desc
,
alpha
,
beta
,
nb_dim
=
inp
top
,
=
grads
top
,
=
grads
top
=
gpu_contiguous
(
top
)
top
=
gpu_contiguous
(
top
)
...
@@ -480,13 +654,14 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -480,13 +654,14 @@ class GpuDnnConv(DnnBase, COp):
d_kerns
=
GpuDnnConvGradW
()(
img
,
top
,
gpu_alloc_empty
(
*
kerns
.
shape
),
desc
)
d_kerns
=
GpuDnnConvGradW
()(
img
,
top
,
gpu_alloc_empty
(
*
kerns
.
shape
),
desc
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_nb_dim
=
grad_not_implemented
(
self
,
6
,
nb_dim
)
return
[
d_img
*
alpha
,
d_kerns
*
alpha
,
top
*
beta
,
return
[
d_img
*
alpha
,
d_kerns
*
alpha
,
top
*
beta
,
DisconnectedType
()(),
d_alpha
,
d_beta
]
DisconnectedType
()(),
d_alpha
,
d_beta
,
d_nb_dim
]
def
connection_pattern
(
self
,
node
):
def
connection_pattern
(
self
,
node
):
# not connected to desc
# not connected to desc
return
[[
1
],
[
1
],
[
1
],
[
0
],
[
1
],
[
1
]]
return
[[
1
],
[
1
],
[
1
],
[
0
],
[
1
],
[
1
]
,
[
1
]
]
@staticmethod
@staticmethod
def
get_out_shape
(
ishape
,
kshape
,
border_mode
,
subsample
):
def
get_out_shape
(
ishape
,
kshape
,
border_mode
,
subsample
):
...
@@ -523,6 +698,101 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -523,6 +698,101 @@ class GpuDnnConv(DnnBase, COp):
return
[
shape
[
2
]]
return
[
shape
[
2
]]
class
GpuDnnConv3d
(
GpuDnnConv
):
"""
The forward convolution.
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
__props__
=
(
'workmem'
,
'inplace'
)
__input_name__
=
(
'image'
,
'kernel'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
def
__init__
(
self
,
workmem
=
None
,
inplace
=
False
):
"""
:param workmem: either 'none', 'small', 'large', 'fft', 'time' or
'guess'. Default is the value of :attr:`config.dnn.conv.workmem`.
"""
### Only workmem = 'none' work with cudnn conv 3d
super
(
GpuDnnConv3d
,
self
)
.
__init__
(
workmem
=
'none'
,
inplace
=
inplace
)
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
,
nb_dim
=
None
):
img
=
as_cuda_ndarray_variable
(
img
)
kern
=
as_cuda_ndarray_variable
(
kern
)
output
=
as_cuda_ndarray_variable
(
output
)
if
img
.
type
.
ndim
!=
5
:
raise
TypeError
(
'img must be 5D tensor'
)
if
kern
.
type
.
ndim
!=
5
:
raise
TypeError
(
'kern must be 5D tensor'
)
if
output
.
type
.
ndim
!=
5
:
raise
TypeError
(
'output must be a 5D tensor'
)
if
not
isinstance
(
desc
.
type
,
CDataType
)
\
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
alpha
=
ensure_float
(
alpha
,
_one
,
'alpha'
)
beta
=
ensure_float
(
beta
,
_zero
,
'beta'
)
nb_dim
=
ensure_int
(
nb_dim
,
_ifive
,
'nb_dim'
)
return
Apply
(
self
,
[
img
,
kern
,
output
,
desc
,
alpha
,
beta
,
nb_dim
],
[
output
.
type
()])
def
grad
(
self
,
inp
,
grads
):
img
,
kerns
,
output
,
desc
,
alpha
,
beta
,
nb_dim
=
inp
top
,
=
grads
top
=
gpu_contiguous
(
top
)
d_img
=
GpuDnnConvGrad3dI
()(
kerns
,
top
,
gpu_alloc_empty
(
*
img
.
shape
),
desc
)
d_kerns
=
GpuDnnConvGrad3dW
()(
img
,
top
,
gpu_alloc_empty
(
*
kerns
.
shape
),
desc
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_nb_dim
=
grad_not_implemented
(
self
,
6
,
nb_dim
)
return
[
d_img
*
alpha
,
d_kerns
*
alpha
,
top
*
beta
,
DisconnectedType
()(),
d_alpha
,
d_beta
,
d_nb_dim
]
@staticmethod
def
get_out_shape
(
ishape
,
kshape
,
border_mode
,
subsample
):
"""
This function computes the output shape for a convolution with
the specified parameters. `ishape` and `kshape` can be symbolic
or scalar.
"""
b
=
ishape
[
0
]
# Number of inputs
d
=
ishape
[
2
]
# Depth of input feature maps
h
=
ishape
[
3
]
# Height of input feature maps
w
=
ishape
[
4
]
# Width of input feature maps
nb
=
kshape
[
0
]
# Number of output feature maps
kd
=
kshape
[
2
]
# Depth of each filter
kh
=
kshape
[
3
]
# Height of each filter
kw
=
kshape
[
4
]
# Width of each filter
sd
,
sh
,
sw
=
subsample
if
border_mode
==
'full'
:
padd
=
kd
-
1
padh
=
kh
-
1
padw
=
kw
-
1
elif
isinstance
(
border_mode
,
tuple
):
padd
,
padh
,
padw
=
border_mode
else
:
assert
border_mode
==
'valid'
padd
=
0
padh
=
0
padw
=
0
return
(
b
,
nb
,
(
d
+
2
*
padd
-
kd
)
//
sd
+
1
,
(
h
+
2
*
padh
-
kh
)
//
sh
+
1
,
(
w
+
2
*
padw
-
kw
)
//
sw
+
1
)
class
GpuDnnConvGradW
(
DnnBase
,
COp
):
class
GpuDnnConvGradW
(
DnnBase
,
COp
):
"""
"""
The convolution gradient with respect to the weights.
The convolution gradient with respect to the weights.
...
@@ -554,7 +824,7 @@ class GpuDnnConvGradW(DnnBase, COp):
...
@@ -554,7 +824,7 @@ class GpuDnnConvGradW(DnnBase, COp):
self
.
inplace
=
False
self
.
inplace
=
False
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
img
,
top
,
output
,
desc
,
alpha
,
beta
=
inp
img
,
top
,
output
,
desc
,
alpha
,
beta
,
nb_dim
=
inp
kerns
,
=
grads
kerns
,
=
grads
kerns
=
gpu_contiguous
(
kerns
)
kerns
=
gpu_contiguous
(
kerns
)
...
@@ -563,13 +833,14 @@ class GpuDnnConvGradW(DnnBase, COp):
...
@@ -563,13 +833,14 @@ class GpuDnnConvGradW(DnnBase, COp):
d_top
=
GpuDnnConv
()(
img
,
kerns
,
gpu_alloc_empty
(
*
top
.
shape
),
desc
)
d_top
=
GpuDnnConv
()(
img
,
kerns
,
gpu_alloc_empty
(
*
top
.
shape
),
desc
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_nb_dim
=
grad_not_implemented
(
self
,
6
,
nb_dim
)
return
(
d_img
*
alpha
,
d_top
*
alpha
,
kerns
*
beta
,
return
(
d_img
*
alpha
,
d_top
*
alpha
,
kerns
*
beta
,
DisconnectedType
()(),
d_alpha
,
d_beta
)
DisconnectedType
()(),
d_alpha
,
d_beta
,
d_nb_dim
)
def
connection_pattern
(
self
,
node
):
def
connection_pattern
(
self
,
node
):
# not connected to desc
# not connected to desc
return
[[
1
],
[
1
],
[
1
],
[
0
],
[
1
],
[
1
]]
return
[[
1
],
[
1
],
[
1
],
[
0
],
[
1
],
[
1
]
,
[
1
]
]
def
get_op_params
(
self
):
def
get_op_params
(
self
):
if
self
.
inplace
:
if
self
.
inplace
:
...
@@ -597,7 +868,7 @@ class GpuDnnConvGradW(DnnBase, COp):
...
@@ -597,7 +868,7 @@ class GpuDnnConvGradW(DnnBase, COp):
return
inplace_def
+
[
alg_def
,
alg_choose_def
]
return
inplace_def
+
[
alg_def
,
alg_choose_def
]
def
make_node
(
self
,
img
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
def
make_node
(
self
,
img
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
,
nb_dim
=
None
):
img
=
as_cuda_ndarray_variable
(
img
)
img
=
as_cuda_ndarray_variable
(
img
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
output
=
as_cuda_ndarray_variable
(
output
)
output
=
as_cuda_ndarray_variable
(
output
)
...
@@ -614,13 +885,70 @@ class GpuDnnConvGradW(DnnBase, COp):
...
@@ -614,13 +885,70 @@ class GpuDnnConvGradW(DnnBase, COp):
alpha
=
ensure_float
(
alpha
,
_one
,
'alpha'
)
alpha
=
ensure_float
(
alpha
,
_one
,
'alpha'
)
beta
=
ensure_float
(
beta
,
_zero
,
'beta'
)
beta
=
ensure_float
(
beta
,
_zero
,
'beta'
)
nb_dim
=
ensure_int
(
nb_dim
,
_ifour
,
'nb_dim'
)
return
Apply
(
self
,
[
img
,
topgrad
,
output
,
desc
,
alpha
,
beta
],
return
Apply
(
self
,
[
img
,
topgrad
,
output
,
desc
,
alpha
,
beta
,
nb_dim
],
[
output
.
type
()])
[
output
.
type
()])
def
infer_shape
(
self
,
node
,
shape
):
def
infer_shape
(
self
,
node
,
shape
):
return
[
shape
[
2
]]
return
[
shape
[
2
]]
class
GpuDnnConv3dGradW
(
GpuDnnConvGradW
):
"""
The convolution gradient with respect to the weights.
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
__props__
=
(
'workmem'
,
'inplace'
,)
__input_name__
=
(
'image'
,
'grad'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
def
__init__
(
self
,
inplace
=
False
,
workmem
=
None
):
### Only workmem = 'none' work with cudnn conv 3d
super
(
GpuDnnConv3dGradW
,
self
)
.
__init
(
inplace
=
inplace
,
workmem
=
'none'
)
def
grad
(
self
,
inp
,
grads
):
img
,
top
,
output
,
desc
,
alpha
,
beta
,
nb_dim
=
inp
kerns
,
=
grads
kerns
=
gpu_contiguous
(
kerns
)
d_img
=
GpuDnnConvGrad3dI
()(
kerns
,
top
,
gpu_alloc_empty
(
*
img
.
shape
),
desc
)
d_top
=
GpuDnnConv3d
()(
img
,
kerns
,
gpu_alloc_empty
(
*
top
.
shape
),
desc
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_nb_dim
=
grad_not_implemented
(
self
,
6
,
nb_dim
)
return
(
d_img
*
alpha
,
d_top
*
alpha
,
kerns
*
beta
,
DisconnectedType
()(),
d_alpha
,
d_beta
,
d_nb_dim
)
def
make_node
(
self
,
img
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
,
nb_dim
=
None
):
img
=
as_cuda_ndarray_variable
(
img
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
output
=
as_cuda_ndarray_variable
(
output
)
if
img
.
type
.
ndim
!=
5
:
raise
TypeError
(
'img must be 5D tensor'
)
if
topgrad
.
type
.
ndim
!=
5
:
raise
TypeError
(
'topgrad must be 5D tensor'
)
if
output
.
type
.
ndim
!=
5
:
raise
TypeError
(
'output must be 5D tensor'
)
if
not
isinstance
(
desc
.
type
,
CDataType
)
\
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
alpha
=
ensure_float
(
alpha
,
_one
,
'alpha'
)
beta
=
ensure_float
(
beta
,
_zero
,
'beta'
)
nb_dim
=
ensure_int
(
nb_dim
,
_ifive
,
'nb_dim'
)
return
Apply
(
self
,
[
img
,
topgrad
,
output
,
desc
,
alpha
,
beta
,
nb_dim
],
[
output
.
type
()])
class
GpuDnnConvGradI
(
DnnBase
,
COp
):
class
GpuDnnConvGradI
(
DnnBase
,
COp
):
"""
"""
...
@@ -652,7 +980,7 @@ class GpuDnnConvGradI(DnnBase, COp):
...
@@ -652,7 +980,7 @@ class GpuDnnConvGradI(DnnBase, COp):
self
.
workmem
=
'none'
self
.
workmem
=
'none'
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
kerns
,
top
,
output
,
desc
,
alpha
,
beta
=
inp
kerns
,
top
,
output
,
desc
,
alpha
,
beta
,
nb_dim
=
inp
img
,
=
grads
img
,
=
grads
img
=
gpu_contiguous
(
img
)
img
=
gpu_contiguous
(
img
)
...
@@ -661,13 +989,14 @@ class GpuDnnConvGradI(DnnBase, COp):
...
@@ -661,13 +989,14 @@ class GpuDnnConvGradI(DnnBase, COp):
d_top
=
GpuDnnConv
()(
img
,
kerns
,
gpu_alloc_empty
(
*
top
.
shape
),
desc
)
d_top
=
GpuDnnConv
()(
img
,
kerns
,
gpu_alloc_empty
(
*
top
.
shape
),
desc
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_nb_dim
=
grad_not_implemented
(
self
,
6
,
nb_dim
)
return
(
d_kerns
*
alpha
,
d_top
*
alpha
,
img
*
beta
,
return
(
d_kerns
*
alpha
,
d_top
*
alpha
,
img
*
beta
,
DisconnectedType
()(),
d_alpha
,
d_beta
)
DisconnectedType
()(),
d_alpha
,
d_beta
,
d_nb_dim
)
def
connection_pattern
(
self
,
node
):
def
connection_pattern
(
self
,
node
):
# not connected to desc
# not connected to desc
return
[[
1
],
[
1
],
[
1
],
[
0
],
[
1
],
[
1
]]
return
[[
1
],
[
1
],
[
1
],
[
0
],
[
1
],
[
1
]
,
[
1
]
]
def
get_op_params
(
self
):
def
get_op_params
(
self
):
if
self
.
inplace
:
if
self
.
inplace
:
...
@@ -695,7 +1024,7 @@ class GpuDnnConvGradI(DnnBase, COp):
...
@@ -695,7 +1024,7 @@ class GpuDnnConvGradI(DnnBase, COp):
return
inplace_def
+
[
alg_def
,
alg_choose_def
]
return
inplace_def
+
[
alg_def
,
alg_choose_def
]
def
make_node
(
self
,
kern
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
def
make_node
(
self
,
kern
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
,
nb_dim
=
None
):
kern
=
as_cuda_ndarray_variable
(
kern
)
kern
=
as_cuda_ndarray_variable
(
kern
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
output
=
as_cuda_ndarray_variable
(
output
)
output
=
as_cuda_ndarray_variable
(
output
)
...
@@ -712,14 +1041,72 @@ class GpuDnnConvGradI(DnnBase, COp):
...
@@ -712,14 +1041,72 @@ class GpuDnnConvGradI(DnnBase, COp):
alpha
=
ensure_float
(
alpha
,
_one
,
'alpha'
)
alpha
=
ensure_float
(
alpha
,
_one
,
'alpha'
)
beta
=
ensure_float
(
beta
,
_zero
,
'beta'
)
beta
=
ensure_float
(
beta
,
_zero
,
'beta'
)
nb_dim
=
ensure_int
(
nb_dim
,
_ifour
,
'nb_dim'
)
return
Apply
(
self
,
[
kern
,
topgrad
,
output
,
desc
,
alpha
,
beta
],
return
Apply
(
self
,
[
kern
,
topgrad
,
output
,
desc
,
alpha
,
beta
,
nb_dim
],
[
output
.
type
()])
[
output
.
type
()])
def
infer_shape
(
self
,
node
,
shape
):
def
infer_shape
(
self
,
node
,
shape
):
return
[
shape
[
2
]]
return
[
shape
[
2
]]
class
GpuDnnConvGrad3dI
(
GpuDnnConvGradI
):
"""
The convolution gradient with respect to the inputs.
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
__props__
=
(
'inplace'
,)
__input_name__
=
(
'kernel'
,
'grad'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
def
__init__
(
self
,
inplace
=
False
):
super
(
GpuDnnConvGradI
,
self
)
.
__init__
(
inplace
)
def
grad
(
self
,
inp
,
grads
):
kerns
,
top
,
output
,
desc
,
alpha
,
beta
,
nb_dim
=
inp
img
,
=
grads
img
=
gpu_contiguous
(
img
)
d_kerns
=
GpuDnnConvGrad3dW
()(
img
,
top
,
gpu_alloc_empty
(
*
kerns
.
shape
),
desc
)
d_top
=
GpuDnnConv3d
()(
img
,
kerns
,
gpu_alloc_empty
(
*
top
.
shape
),
desc
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_nb_dim
=
grad_not_implemented
(
self
,
6
,
nb_dim
)
return
(
d_kerns
*
alpha
,
d_top
*
alpha
,
img
*
beta
,
DisconnectedType
()(),
d_alpha
,
d_beta
,
d_nb_dim
)
def
make_node
(
self
,
kern
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
,
nb_dim
=
None
):
kern
=
as_cuda_ndarray_variable
(
kern
)
topgrad
=
as_cuda_ndarray_variable
(
topgrad
)
output
=
as_cuda_ndarray_variable
(
output
)
if
kern
.
type
.
ndim
!=
5
:
raise
TypeError
(
'kern must be 5D tensor'
)
if
topgrad
.
type
.
ndim
!=
5
:
raise
TypeError
(
'topgrad must be 5D tensor'
)
if
output
.
type
.
ndim
!=
4
:
raise
TypeError
(
'output must be 5D tensor'
)
if
not
isinstance
(
desc
.
type
,
CDataType
)
\
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
alpha
=
ensure_float
(
alpha
,
_one
,
'alpha'
)
beta
=
ensure_float
(
beta
,
_zero
,
'beta'
)
nb_dim
=
ensure_int
(
nb_dim
,
_ifive
,
'nb_dim'
)
return
Apply
(
self
,
[
kern
,
topgrad
,
output
,
desc
,
alpha
,
beta
,
nb_dim
],
[
output
.
type
()])
def
dnn_conv
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
def
dnn_conv
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
):
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
):
"""
"""
...
@@ -804,6 +1191,179 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -804,6 +1191,179 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
return
GpuDnnConv
(
workmem
=
workmem
)(
img
,
kerns
,
out
,
desc
)
return
GpuDnnConv
(
workmem
=
workmem
)(
img
,
kerns
,
out
,
desc
)
def
dnn_conv
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
):
"""
GPU convolution using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
'first dim', 'second dim' in that order.
:param img: images to do the convolution over
:param kerns: convolution filters
:param border_mode: one of 'valid', 'full'; additionally, the padding size
could be directly specified by an integer or a pair of integers
:param subsample: perform subsampling of the output (default: (1, 1))
:param conv_mode: perform convolution (kernels flipped) or cross-correlation.
One of 'conv', 'cross'. (default: 'conv')
:param direction_hint: Used by graph optimizers to change algorithm choice.
By default, GpuDnnConv will be used to carry out the convolution.
If border_mode is 'valid', subsample is (1,1) and direction_hint is
'bprop weights', it will use GpuDnnConvGradW.
If border_mode is 'full', subsample is (1,1) and direction_hint is
*not* 'forward!', it will use GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned.
:param workmem: Specify the amount of working memory allowed.
More memory is usually faster. One of 'none', 'small' or
'large'. (default is None which takes its value from
:attr:`config.dnn.conv.workmem`)
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
"""
fgraph
=
getattr
(
img
,
'fgraph'
,
None
)
or
getattr
(
kerns
,
'fgraph'
,
None
)
if
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
direction_hint
==
'bprop weights'
):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
img
=
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
))
if
conv_mode
==
'conv'
:
# We need to flip manually. These 'kerns' are not the kernels
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
]
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
out
=
gpu_alloc_empty
(
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'cross'
)(
img
.
shape
,
out
.
shape
)
conv
=
GpuDnnConvGradW
()(
img
,
kerns
,
out
,
desc
)
return
as_cuda_ndarray_variable
(
conv
.
dimshuffle
(
1
,
0
,
2
,
3
))
elif
(
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
direction_hint
!=
'forward!'
):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
# We just need to set up a suitable 'fake' valid convolution.
img
=
gpu_contiguous
(
img
)
# cudnn v1 and v2 rc3 need contiguous data
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
out
=
gpu_alloc_empty
(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape2
,
shape3
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
conv_mode
)(
out
.
shape
,
kerns
.
shape
)
return
GpuDnnConvGradI
()(
kerns
,
img
,
out
,
desc
)
# Standard case: We use GpuDnnConv with suitable padding.
# contig_version will return a gpu_contiguous copy
# if the img contains negative strides
img
=
gpu_contiguous
(
img
)
kerns
=
gpu_contiguous
(
kerns
)
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)(
img
.
shape
,
kerns
.
shape
)
desc_op
=
desc
.
owner
.
op
out_shp
=
GpuDnnConv
.
get_out_shape
(
img
.
shape
,
kerns
.
shape
,
desc_op
.
border_mode
,
desc_op
.
subsample
)
out
=
gpu_alloc_empty
(
*
out_shp
)
return
GpuDnnConv
(
workmem
=
workmem
)(
img
,
kerns
,
out
,
desc
)
def
dnn_conv3d
(
img
,
kerns
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
conv_mode
=
'conv'
,
direction_hint
=
None
,
workmem
=
None
):
"""
GPU convolution using cuDNN from NVIDIA.
The memory layout to use is 'bct01', that is 'batch', 'channel',
'first dim', 'second dim', 'third dim' in that order.
:param img: images to do the convolution over
:param kerns: convolution filters
:param border_mode: one of 'valid', 'full'; additionally, the padding size
could be directly specified by an integer or a pair of integers
:param subsample: perform subsampling of the output (default: (1, 1, 1))
:param conv_mode: perform convolution (kernels flipped) or cross-correlation.
One of 'conv', 'cross'. (default: 'conv')
:param direction_hint: Used by graph optimizers to change algorithm choice.
By default, GpuDnnConv will be used to carry out the convolution.
If border_mode is 'valid', subsample is (1,1,1) and direction_hint is
'bprop weights', it will use GpuDnnConvGradW.
If border_mode is 'full', subsample is (1,1,1) and direction_hint is
*not* 'forward!', it will use GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned.
:param workmem: Specify the amount of working memory allowed.
More memory is usually faster. One of 'none', 'small' or
'large'. (default is None which takes its value from
:attr:`config.dnn.conv.workmem`)
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
work with this Op.
:warning: dnn_conv"d only works with cuDNN library 3.0
"""
fgraph
=
getattr
(
img
,
'fgraph'
,
None
)
or
getattr
(
kerns
,
'fgraph'
,
None
)
if
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
,
1
)
and
direction_hint
==
'bprop weights'
):
# Special case: We are asked to use GpuDnnConvGradW. We need to set
# up a suitable 'fake' convolution to compute the gradient for.
img
=
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
if
conv_mode
==
'conv'
:
# We need to flip manually. These 'kerns' are not the kernels
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns
=
kerns
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
shape4
=
shape_i
(
img
,
4
,
fgraph
)
-
shape_i
(
kerns
,
4
,
fgraph
)
+
1
out
=
gpu_alloc_empty
(
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
,
shape3
)
desc
=
GpuDnnConv3dDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
conv_mode
=
'cross'
)(
img
.
shape
,
out
.
shape
)
conv
=
GpuDnnConv3dGradW
()(
img
,
kerns
,
out
,
desc
)
return
as_cuda_ndarray_variable
(
conv
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
elif
(
border_mode
==
'full'
and
subsample
==
(
1
,
1
,
1
)
and
direction_hint
!=
'forward!'
):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
# We just need to set up a suitable 'fake' valid convolution.
img
=
gpu_contiguous
(
img
)
# cudnn v1 and v2 rc3 need contiguous data
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
shape4
=
shape_i
(
img
,
4
,
fgraph
)
+
shape_i
(
kerns
,
4
,
fgraph
)
-
1
out
=
gpu_alloc_empty
(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape2
,
shape3
,
shape4
)
desc
=
GpuDnnConv3dDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
conv_mode
=
conv_mode
)(
out
.
shape
,
kerns
.
shape
)
return
GpuDnnConv3dGradI
()(
kerns
,
img
,
out
,
desc
)
# Standard case: We use GpuDnnConv with suitable padding.
# contig_version will return a gpu_contiguous copy
# if the img contains negative strides
img
=
gpu_contiguous
(
img
)
kerns
=
gpu_contiguous
(
kerns
)
desc
=
GpuDnnConv3dDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)(
img
.
shape
,
kerns
.
shape
)
desc_op
=
desc
.
owner
.
op
out_shp
=
GpuDnnConv3d
.
get_out_shape
(
img
.
shape
,
kerns
.
shape
,
desc_op
.
border_mode
,
desc_op
.
subsample
)
out
=
gpu_alloc_empty
(
*
out_shp
)
return
GpuDnnConv3d
(
workmem
=
workmem
)(
img
,
kerns
,
out
,
desc
)
class
GpuDnnPoolDesc
(
GpuOp
):
class
GpuDnnPoolDesc
(
GpuOp
):
"""
"""
This Op builds a pooling descriptor for use in the other
This Op builds a pooling descriptor for use in the other
...
...
theano/sandbox/cuda/dnn_base.c
浏览文件 @
6117f98b
...
@@ -33,6 +33,60 @@ c_set_tensor4d(CudaNdarray *var, cudnnTensorDescriptor_t desc) {
...
@@ -33,6 +33,60 @@ c_set_tensor4d(CudaNdarray *var, cudnnTensorDescriptor_t desc) {
return
0
;
return
0
;
}
}
static
int
c_set_tensorNd
(
CudaNdarray
*
var
,
int
dim
,
cudnnTensorDescriptor_t
desc
)
{
int
strides
[
dim
];
for
(
int
i
=
0
;
i
<
dim
;
++
i
)
{
if
(
CudaNdarray_HOST_STRIDES
(
var
)[
i
])
strides
[
i
]
=
CudaNdarray_HOST_STRIDES
(
var
)[
i
];
else
{
strides
[
i
]
=
1
;
for
(
int
j
=
i
+
1
;
j
<
dim
;
++
j
)
strides
[
i
]
*=
CudaNdarray_HOST_DIMS
(
var
)[
j
];
}
}
cudnnStatus_t
err
=
cudnnSetTensorNdDescriptor
(
desc
,
CUDNN_DATA_FLOAT
,
dim
,
CudaNdarray_HOST_DIMS
(
var
),
strides
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set tensorNd descriptor: %s"
"dim=%d"
,
cudnnGetErrorString
(
err
),
dim
);
return
-
1
;
}
return
0
;
}
static
int
c_set_filterNd
(
CudaNdarray
*
var
,
int
dim
,
cudnnFilterDescriptor_t
desc
)
{
if
(
!
CudaNdarray_is_c_contiguous
(
var
))
{
PyErr_SetString
(
PyExc_ValueError
,
"Only contiguous filters (kernels) are supported."
);
return
-
1
;
}
cudnnStatus_t
err
=
cudnnSetFilterNdDescriptor
(
desc
,
CUDNN_DATA_FLOAT
,
dim
,
CudaNdarray_HOST_DIMS
(
var
));
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"Could not set filter descriptor: %s."
" dims= %d"
,
cudnnGetErrorString
(
err
),
dim
);
return
-
1
;
}
return
0
;
}
static
int
static
int
c_set_filter
(
CudaNdarray
*
var
,
cudnnFilterDescriptor_t
desc
)
{
c_set_filter
(
CudaNdarray
*
var
,
cudnnFilterDescriptor_t
desc
)
{
if
(
!
CudaNdarray_is_c_contiguous
(
var
))
{
if
(
!
CudaNdarray_is_c_contiguous
(
var
))
{
...
...
theano/sandbox/cuda/dnn_conv_base.c
浏览文件 @
6117f98b
...
@@ -7,9 +7,9 @@ cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns);
...
@@ -7,9 +7,9 @@ cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns);
and the algorithms, if any, that were selected according to these dimensions
and the algorithms, if any, that were selected according to these dimensions
and according to the amount of memory available at that time.
and according to the amount of memory available at that time.
*/
*/
int
APPLY_SPECIFIC
(
previous_input_shape
)[
4
];
int
APPLY_SPECIFIC
(
previous_input_shape
)[
5
];
int
APPLY_SPECIFIC
(
previous_kerns_shape
)[
4
];
int
APPLY_SPECIFIC
(
previous_kerns_shape
)[
5
];
int
APPLY_SPECIFIC
(
previous_output_shape
)[
4
];
int
APPLY_SPECIFIC
(
previous_output_shape
)[
5
];
cudnnConvolutionFwdAlgo_t
APPLY_SPECIFIC
(
previous_algo
);
cudnnConvolutionFwdAlgo_t
APPLY_SPECIFIC
(
previous_algo
);
cudnnConvolutionBwdFilterAlgo_t
APPLY_SPECIFIC
(
previous_bwd_f_algo
);
cudnnConvolutionBwdFilterAlgo_t
APPLY_SPECIFIC
(
previous_bwd_f_algo
);
cudnnConvolutionBwdDataAlgo_t
APPLY_SPECIFIC
(
previous_bwd_d_algo
);
cudnnConvolutionBwdDataAlgo_t
APPLY_SPECIFIC
(
previous_bwd_d_algo
);
...
@@ -21,12 +21,12 @@ APPLY_SPECIFIC(input) = NULL;
...
@@ -21,12 +21,12 @@ APPLY_SPECIFIC(input) = NULL;
APPLY_SPECIFIC
(
output
)
=
NULL
;
APPLY_SPECIFIC
(
output
)
=
NULL
;
APPLY_SPECIFIC
(
kerns
)
=
NULL
;
APPLY_SPECIFIC
(
kerns
)
=
NULL
;
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
input
)))
!=
CUDNN_STATUS_SUCCESS
)
{
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
input
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor
4d
descriptor "
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor descriptor "
"(inp): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
"(inp): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
FAIL
;
FAIL
;
}
}
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
output
)))
!=
CUDNN_STATUS_SUCCESS
)
{
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
output
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor
4d
descriptor "
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor descriptor "
"(out): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
"(out): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
FAIL
;
FAIL
;
}
}
...
@@ -36,7 +36,7 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateFilterDescriptor(&APPLY_SPECIFIC(kerns)))
...
@@ -36,7 +36,7 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateFilterDescriptor(&APPLY_SPECIFIC(kerns)))
FAIL
;
FAIL
;
}
}
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
{
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]
=
0
;
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]
=
0
;
APPLY_SPECIFIC
(
previous_kerns_shape
)[
i
]
=
0
;
APPLY_SPECIFIC
(
previous_kerns_shape
)[
i
]
=
0
;
...
...
theano/sandbox/cuda/dnn_fwd.c
浏览文件 @
6117f98b
...
@@ -3,7 +3,8 @@
...
@@ -3,7 +3,8 @@
int
int
APPLY_SPECIFIC
(
conv_fwd
)(
CudaNdarray
*
input
,
CudaNdarray
*
kerns
,
APPLY_SPECIFIC
(
conv_fwd
)(
CudaNdarray
*
input
,
CudaNdarray
*
kerns
,
CudaNdarray
*
om
,
cudnnConvolutionDescriptor_t
desc
,
CudaNdarray
*
om
,
cudnnConvolutionDescriptor_t
desc
,
float
alpha
,
float
beta
,
CudaNdarray
**
output
)
{
float
alpha
,
float
beta
,
int
nb_dim
,
CudaNdarray
**
output
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
CudaNdarray_HOST_DIMS
(
input
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
kerns
)[
1
])
{
if
(
CudaNdarray_HOST_DIMS
(
input
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
...
@@ -11,37 +12,49 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -11,37 +12,49 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return
1
;
return
1
;
}
}
if
(
c_set_tensor
4d
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
if
(
c_set_tensor
Nd
(
input
,
nb_dim
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
Nd
(
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
return
1
;
/* if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
/* if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
Py_XDECREF
(
*
output
);
Py_XDECREF
(
*
output
);
*
output
=
om
;
*
output
=
om
;
Py_INCREF
(
*
output
);
Py_INCREF
(
*
output
);
#else
#else
if
(
CudaNdarray_prep_output
(
output
,
4
,
CudaNdarray_HOST_DIMS
(
om
))
!=
0
)
if
(
CudaNdarray_prep_output
(
output
,
nb_dim
,
CudaNdarray_HOST_DIMS
(
om
))
!=
0
)
return
1
;
return
1
;
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
output
,
om
))
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
output
,
om
))
return
1
;
return
1
;
#endif
#endif
if
(
c_set_tensor4d
(
*
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
if
(
c_set_tensorNd
(
*
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
return
1
;
/* if (c_set_tensor4d(*output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
{
{
size_t
worksize
;
size_t
worksize
;
void
*
workspace
;
void
*
workspace
;
cudnnConvolutionFwdAlgo_t
chosen_algo
;
cudnnConvolutionFwdAlgo_t
chosen_algo
;
for
(
int
i
=
0
;
(
i
<
nb_dim
);
i
++
)
std
::
cout
<<
i
<<
"/"
<<
nb_dim
<<
", "
<<
CudaNdarray_HOST_DIMS
(
input
)[
i
]
<<
", "
<<
CudaNdarray_HOST_DIMS
(
kerns
)[
i
]
<<
std
::
endl
;
if
(
CHOOSE_ALGO
)
if
(
CHOOSE_ALGO
)
{
{
// Check if the input and the kernels have the same shape as they have
// Check if the input and the kernels have the same shape as they have
// last time the apply node was executed
// last time the apply node was executed
bool
same_shapes
=
true
;
bool
same_shapes
=
true
;
for
(
int
i
=
0
;
(
i
<
4
)
&&
same_shapes
;
i
++
)
for
(
int
i
=
0
;
(
i
<
nb_dim
)
&&
same_shapes
;
i
++
)
{
{
same_shapes
&=
(
CudaNdarray_HOST_DIMS
(
input
)[
i
]
!=
same_shapes
&=
(
CudaNdarray_HOST_DIMS
(
input
)[
i
]
!=
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]);
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]);
...
@@ -115,7 +128,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -115,7 +128,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
// Store the shapes of the inputs and kernels as well as the chosen
// Store the shapes of the inputs and kernels as well as the chosen
// algorithm for future use.
// algorithm for future use.
APPLY_SPECIFIC
(
previous_algo
)
=
chosen_algo
;
APPLY_SPECIFIC
(
previous_algo
)
=
chosen_algo
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
nb_dim
;
i
++
)
{
{
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]
=
APPLY_SPECIFIC
(
previous_input_shape
)[
i
]
=
CudaNdarray_HOST_DIMS
(
input
)[
i
];
CudaNdarray_HOST_DIMS
(
input
)[
i
];
...
@@ -142,7 +155,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -142,7 +155,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
// If the chosen implementation is FFT, validate that it can be used
// If the chosen implementation is FFT, validate that it can be used
// on the current data and default on a safe implementation if it
// on the current data and default on a safe implementation if it
// can't.
// can't.
if
(
chosen_algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT
)
// Following code is 2d-specific, but it is fine as ftt is define only for 2d-filters
if
(
chosen_algo
==
CUDNN_CONVOLUTION_FWD_ALGO_FFT
&&
nb_dim
==
4
)
{
{
// Extract the properties of the convolution descriptor
// Extract the properties of the convolution descriptor
...
@@ -186,12 +200,12 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -186,12 +200,12 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
chosen_algo
,
chosen_algo
,
&
worksize
);
&
worksize
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
std
::
cout
<<
"here"
<<
std
::
endl
;
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error getting worksize: %s"
,
"GpuDnnConv: error getting worksize: %s"
,
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
return
1
;
return
1
;
}
}
workspace
=
get_work_mem
(
worksize
);
workspace
=
get_work_mem
(
worksize
);
if
(
workspace
==
NULL
&&
worksize
!=
0
)
if
(
workspace
==
NULL
&&
worksize
!=
0
)
return
1
;
return
1
;
...
@@ -208,6 +222,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -208,6 +222,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC
(
output
),
CudaNdarray_DEV_DATA
(
*
output
));
APPLY_SPECIFIC
(
output
),
CudaNdarray_DEV_DATA
(
*
output
));
}
}
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
std
::
cout
<<
"here2"
<<
std
::
endl
;
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error doing operation: %s"
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error doing operation: %s"
,
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
return
1
;
return
1
;
...
...
theano/sandbox/cuda/dnn_gi.c
浏览文件 @
6117f98b
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
int
int
APPLY_SPECIFIC
(
conv_gi
)(
CudaNdarray
*
kerns
,
CudaNdarray
*
output
,
APPLY_SPECIFIC
(
conv_gi
)(
CudaNdarray
*
kerns
,
CudaNdarray
*
output
,
CudaNdarray
*
im
,
cudnnConvolutionDescriptor_t
desc
,
CudaNdarray
*
im
,
cudnnConvolutionDescriptor_t
desc
,
float
alpha
,
float
beta
,
CudaNdarray
**
input
)
{
float
alpha
,
float
beta
,
int
nb_dim
,
CudaNdarray
**
input
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
CudaNdarray_HOST_DIMS
(
im
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
kerns
)[
1
])
{
if
(
CudaNdarray_HOST_DIMS
(
im
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
kerns
)[
1
])
{
...
@@ -12,9 +12,14 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -12,9 +12,14 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return
1
;
return
1
;
}
}
if
(
c_set_tensor4d
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
/* if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
/* if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
if
(
c_set_tensorNd
(
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
Nd
(
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
return
1
;
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
...
@@ -22,13 +27,16 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -22,13 +27,16 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
*
input
=
im
;
*
input
=
im
;
Py_INCREF
(
*
input
);
Py_INCREF
(
*
input
);
#else
#else
if
(
CudaNdarray_prep_output
(
input
,
4
,
CudaNdarray_HOST_DIMS
(
im
))
!=
0
)
if
(
CudaNdarray_prep_output
(
input
,
nb_dim
,
CudaNdarray_HOST_DIMS
(
im
))
!=
0
)
return
1
;
return
1
;
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
input
,
im
))
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
input
,
im
))
return
1
;
return
1
;
#endif
#endif
if
(
c_set_tensor4d
(
*
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
/* if (c_set_tensor4d(*input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
if
(
c_set_tensorNd
(
*
input
,
nb_dim
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
return
1
;
{
{
...
...
theano/sandbox/cuda/dnn_gw.c
浏览文件 @
6117f98b
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
int
int
APPLY_SPECIFIC
(
conv_gw
)(
CudaNdarray
*
input
,
CudaNdarray
*
output
,
APPLY_SPECIFIC
(
conv_gw
)(
CudaNdarray
*
input
,
CudaNdarray
*
output
,
CudaNdarray
*
km
,
cudnnConvolutionDescriptor_t
desc
,
CudaNdarray
*
km
,
cudnnConvolutionDescriptor_t
desc
,
float
alpha
,
float
beta
,
CudaNdarray
**
kerns
)
{
float
alpha
,
float
beta
,
int
nb_dim
,
CudaNdarray
**
kerns
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
CudaNdarray_HOST_DIMS
(
input
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
km
)[
1
])
{
if
(
CudaNdarray_HOST_DIMS
(
input
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
km
)[
1
])
{
...
@@ -12,9 +12,14 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -12,9 +12,14 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return
1
;
return
1
;
}
}
if
(
c_set_tensor4d
(
input
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
/* if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
/* if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
if
(
c_set_tensorNd
(
input
,
nb_dim
,
APPLY_SPECIFIC
(
input
))
==
-
1
)
return
1
;
return
1
;
if
(
c_set_tensor
4d
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
if
(
c_set_tensor
Nd
(
output
,
nb_dim
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
return
1
;
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
...
@@ -22,13 +27,15 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -22,13 +27,15 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
*
kerns
=
km
;
*
kerns
=
km
;
Py_INCREF
(
*
kerns
);
Py_INCREF
(
*
kerns
);
#else
#else
if
(
CudaNdarray_prep_output
(
kerns
,
4
,
CudaNdarray_HOST_DIMS
(
km
))
!=
0
)
if
(
CudaNdarray_prep_output
(
kerns
,
nb_dim
,
CudaNdarray_HOST_DIMS
(
km
))
!=
0
)
return
1
;
return
1
;
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
kerns
,
km
))
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
kerns
,
km
))
return
1
;
return
1
;
#endif
#endif
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
/* if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
if
(
c_set_filterNd
(
*
kerns
,
nb_dim
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
return
1
;
{
{
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
6117f98b
...
@@ -13,6 +13,7 @@ from theano.tensor.signal.downsample import max_pool_2d
...
@@ -13,6 +13,7 @@ from theano.tensor.signal.downsample import max_pool_2d
from
theano.tensor.signal.downsample
import
DownsampleFactorMaxGrad
from
theano.tensor.signal.downsample
import
DownsampleFactorMaxGrad
import
theano.sandbox.cuda.dnn
as
dnn
import
theano.sandbox.cuda.dnn
as
dnn
from
theano.sandbox.cuda.basic_ops
import
GpuAllocEmpty
,
gpu_alloc_empty
from
theano.sandbox.cuda.basic_ops
import
GpuAllocEmpty
,
gpu_alloc_empty
from
theano.sandbox.cuda
import
float32_shared_constructor
as
shared
# Skip test if cuda_ndarray is not available.
# Skip test if cuda_ndarray is not available.
import
theano.sandbox.cuda
as
cuda
import
theano.sandbox.cuda
as
cuda
...
@@ -763,6 +764,58 @@ def test_dnn_conv_grad():
...
@@ -763,6 +764,58 @@ def test_dnn_conv_grad():
utt
.
verify_grad
(
dconvw
,
[
img_val
,
kern_val
,
out_val
])
utt
.
verify_grad
(
dconvw
,
[
img_val
,
kern_val
,
out_val
])
def
test_conv3d_valid
():
print
dnn
.
version
()
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
'"3D conv not supported in cudnn v1'
)
def
run_conv3d_valid
(
inputs_shape
,
filters_shape
,
subsample
=
(
1
,
1
,
1
)):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
inputs
=
shared
(
inputs_val
)
filters
=
shared
(
filters_val
)
bias
=
shared
(
numpy
.
zeros
(
filters_shape
[
0
])
.
astype
(
'float32'
))
conv_ref
=
theano
.
tensor
.
nnet
.
conv3D
(
V
=
inputs
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
),
W
=
filters
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
),
b
=
bias
,
d
=
subsample
)
conv
=
dnn
.
dnn_conv3d
(
img
=
inputs
,
kerns
=
filters
,
border_mode
=
"valid"
,
subsample
=
subsample
,
conv_mode
=
'cross'
)
f_ref
=
theano
.
function
([],
conv_ref
.
dimshuffle
(
0
,
4
,
1
,
2
,
3
))
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
res_ref
=
f_ref
()
res
=
f
()
print
res_ref
.
shape
,
res
.
shape
utt
.
assert_allclose
(
res_ref
,
res
)
run_conv3d_valid
(
inputs_shape
=
(
128
,
3
,
5
,
5
,
5
),
filters_shape
=
(
64
,
3
,
1
,
2
,
4
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
4
,
20
,
12
,
15
),
filters_shape
=
(
10
,
4
,
6
,
12
,
4
),
subsample
=
(
2
,
2
,
2
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
4
,
20
,
12
,
15
),
filters_shape
=
(
10
,
4
,
6
,
12
,
4
),
subsample
=
(
2
,
2
,
2
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
3
,
3
,
3
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
2
,
20
,
12
,
15
),
filters_shape
=
(
10
,
2
,
6
,
12
,
4
),
subsample
=
(
3
,
3
,
3
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
3
,
2
,
1
))
run_conv3d_valid
(
inputs_shape
=
(
16
,
1
,
20
,
12
,
15
),
filters_shape
=
(
10
,
1
,
6
,
12
,
4
),
subsample
=
(
1
,
2
,
3
))
def
test_version
():
def
test_version
():
if
not
cuda
.
dnn
.
dnn_available
():
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论