Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
268bc917
提交
268bc917
authored
6月 18, 2015
作者:
Arnaud Bergeron
提交者:
--global
8月 04, 2015
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Extend the current pooling op to support 3d pooling.
This does not have any tests or optimizations for 3d pooling, but the 2d support still works at least.
上级
525c9c84
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
176 行增加
和
146 行删除
+176
-146
cudnn_helper.h
theano/sandbox/cuda/cudnn_helper.h
+79
-0
dnn.py
theano/sandbox/cuda/dnn.py
+97
-146
没有找到文件。
theano/sandbox/cuda/cudnn_helper.h
浏览文件 @
268bc917
...
...
@@ -197,6 +197,85 @@ cudnnConvolutionBackwardData_v2(
#define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2
static
inline
cudnnStatus_t
cudnnSetPoolingNdDescriptor
(
cudnnPoolingDescriptor_t
poolingDesc
,
const
cudnnPoolingMode_t
mode
,
int
nbDims
,
const
int
windowDimA
[],
const
int
paddingA
[],
const
in
strideA
[])
{
if
(
nbDims
!=
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
paddingA
[
0
]
!=
0
||
paddingA
[
1
]
!=
0
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
return
cudnnSetPoolingDescriptor
(
poolingDesc
,
mode
,
windowDimA
[
0
],
windowDimA
[
1
],
strideA
[
0
],
strideA
[
1
]);
}
static
inline
cudnnStatus_t
cudnnGetPoolingNdDescriptor
(
const
cudnnPoolingDescriptor_t
poolingDesc
,
const
int
nbDimsRequested
,
cudnnPoolingMode_t
*
mode
,
int
*
nbDims
,
int
windowA
[],
int
paddingA
[],
int
strideA
[])
{
int
win0
,
win1
,
str0
,
str1
;
cudnnStatus_t
err
;
if
(
ndDimsRequested
<
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
err
=
cudnnGetPoolingDescriptor
(
poolingDesc
,
mode
,
&
win0
,
&
win1
,
&
str0
,
&
str1
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
return
err
;
*
nbDims
=
2
;
paddingA
[
0
]
=
0
;
paddingA
[
1
]
=
0
;
windowA
[
0
]
=
win0
;
windowA
[
1
]
=
win1
;
strideA
[
0
]
=
str0
;
strideA
[
1
]
=
str1
;
return
CUDNN_STATUS_SUCCESS
;
}
static
inline
cudnnStatus_t
cudnnPoolingForward_v2
(
cudnnHandle_t
handle
,
const
cudnnPoolingDescriptor_t
poolingDesc
,
const
void
*
alpha
,
const
cudnnTensorDescriptor_t
srcDesc
,
const
void
*
srcData
,
const
void
*
beta
,
const
cudnnTensorDescriptor_t
destDesc
,
void
*
destData
)
{
if
(
*
(
float
*
)
alpha
!=
1
.
0
||
*
(
float
*
)
beta
!=
0
.
0
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
return
cudnnPoolingForward
(
handle
,
poolingDesc
,
srcDesc
,
srcData
,
destDesc
,
destData
);
}
#define cudnnPoolingForward cudnnPoolingForward_v2
static
inline
cudnnStatus_t
cudnnPoolingBackward_v2
(
cudnnHandle_t
handle
,
const
cudnnPoolingDescriptor_t
poolingDesc
,
const
void
*
alpha
,
const
cudnnTensorDescriptor_t
srcDesc
,
const
void
*
srcData
,
const
cudnnTensorDescriptor_t
srcDiffDesc
,
const
void
*
srcDiffData
,
const
cudnnTensorDescriptor_t
destDesc
,
const
void
*
destData
,
const
void
*
beta
,
const
cudnnTensorDescriptor_t
destDiffDesc
,
void
*
destDiffData
)
{
if
(
*
(
float
*
)
alpha
!=
1
.
0
||
*
(
float
*
)
beta
!=
0
.
0
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
return
cudnnPoolingBackward
(
handle
,
poolingDesc
,
srcDesc
,
srcData
,
srcDiffDesc
,
srcDiffData
,
destDesc
,
destData
,
destDiffDesc
,
destDiffData
);
}
#define cudnnPoolingBackward cudnnPoolingBackward_v2
//Needed for R2 rc2
# define CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING CUDNN_POOLING_AVERAGE
#else
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
268bc917
...
...
@@ -1266,7 +1266,7 @@ class GpuDnnPoolDesc(GpuOp):
padX is the size of the left and right borders,
padY is the size of the top and bottom borders.
"""
__props__
=
(
'ws'
,
'stride'
,
'mode'
,
'pad'
)
__props__
=
(
'ws'
,
'stride'
,
'mode'
,
'pad'
,
'nd'
)
def
c_headers
(
self
):
return
[
'cudnn.h'
,
'cudnn_helper.h'
]
...
...
@@ -1283,19 +1283,23 @@ class GpuDnnPoolDesc(GpuOp):
def
do_constant_folding
(
self
,
node
):
return
False
def
__init__
(
self
,
ws
=
(
1
,
1
),
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)):
def
__init__
(
self
,
ws
=
(
1
,
1
),
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)
,
nd
=
2
):
if
mode
==
'average'
:
mode
=
'average_inc_pad'
assert
mode
in
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
self
.
mode
=
mode
assert
len
(
ws
)
==
2
assert
len
(
ws
)
==
nd
self
.
ws
=
ws
assert
len
(
stride
)
==
2
assert
len
(
stride
)
==
nd
self
.
stride
=
stride
assert
len
(
stride
)
==
2
assert
len
(
stride
)
==
nd
self
.
pad
=
pad
if
(
pad
[
0
]
!=
0
or
pad
[
1
]
!=
0
)
and
version
()
==
-
1
:
raise
RuntimeError
(
"CuDNN pooling with padding requires CuDNN v2"
)
assert
nd
in
(
2
,
3
)
if
nd
==
3
and
version
()
<
(
3000
,
3000
):
raise
RuntimeError
(
"3 pooling only supported on CuDNN v3"
)
self
.
nd
=
nd
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
...
...
@@ -1333,22 +1337,14 @@ class GpuDnnPoolDesc(GpuOp):
"descriptor:
%%
s", cudnnGetErrorString(err));
%(fail)
s
}
#ifndef CUDNN_VERSION
err = cudnnSetPoolingDescriptor(
%(desc)
s,
%(mode_flag)
s,
%(wsX)
d,
%(wsY)
d,
%(stridex)
d,
%(stridey)
d
);
#else
err = cudnnSetPooling2dDescriptor(
%(desc)
s,
%(mode_flag)
s,
%(wsX)
d,
%(wsY)
d,
%(padX)
d,
%(padY)
d,
%(stridex)
d,
%(stridey)
d
);
#endif
{
int win[
%(nd)
d] = {
%(win)
s};
int pad[
%(nd)
d] = {
%(pad)
s};
int str[
%(nd)
d] = {
%(str)
s};
err = cudnnSetPoolingNdDescriptor(
%(desc)
s,
%(mode_flag)
s,
%(nd)
d,
win, pad, str);
}
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor:
%%
s",
cudnnGetErrorString(err));
...
...
@@ -1356,46 +1352,49 @@ class GpuDnnPoolDesc(GpuOp):
}
}
"""
%
dict
(
name
=
name
,
desc
=
desc
,
mode_flag
=
mode_flag
,
fail
=
sub
[
'fail'
],
wsX
=
self
.
ws
[
0
],
wsY
=
self
.
ws
[
1
]
,
stridex
=
self
.
stride
[
0
],
stridey
=
self
.
stride
[
1
]
,
padX
=
self
.
pad
[
0
],
padY
=
self
.
pad
[
1
]
)
nd
=
self
.
nd
,
win
=
', '
.
join
(
str
(
w
)
for
w
in
self
.
ws
)
,
pad
=
', '
.
join
(
str
(
p
)
for
p
in
self
.
pad
)
,
str
=
', '
.
join
(
str
(
s
)
for
s
in
self
.
stride
)
)
def
c_code_cache_version
(
self
):
return
(
2
,
version
())
return
(
3
,
version
())
class
GpuDnnPool
(
DnnBase
):
"""
Pooling.
:param img: the image 4d tensor.
:param img: the image 4d
or 5d
tensor.
:param desc: the pooling descriptor.
"""
__props__
=
()
def
make_node
(
self
,
img
,
desc
):
img
=
as_cuda_ndarray_variable
(
img
)
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
if
not
isinstance
(
desc
.
type
,
CDataType
)
\
or
desc
.
type
.
ctype
!=
'cudnnPoolingDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnPoolingDescriptor_t'
)
return
Apply
(
self
,
[
img
,
desc
],
[
img
.
type
()])
dop
=
desc
.
owner
.
op
e_ndim
=
dop
.
nd
+
2
# 4 or 5
if
img
.
type
.
ndim
!=
e_ndim
:
raise
TypeError
(
'img must be
%
dD tensor'
%
e_ndim
)
return
Apply
(
self
,
[
img
,
desc
],
[
img
.
type
()])
def
infer_shape
(
self
,
node
,
shape
):
desc
=
node
.
inputs
[
1
]
.
owner
.
op
kh
,
kw
=
desc
.
ws
sh
,
sw
=
desc
.
stride
padh
,
padw
=
desc
.
pad
return
[(
shape
[
0
][
0
],
shape
[
0
][
1
],
(
shape
[
0
][
2
]
+
2
*
padh
-
kh
)
//
sh
+
1
,
(
shape
[
0
][
3
]
+
2
*
padw
-
kw
)
//
sw
+
1
)]
nd
=
desc
.
nd
w
=
desc
.
ws
s
=
desc
.
stride
p
=
desc
.
pad
ret
=
[
shape
[
0
][
0
],
shape
[
0
][
1
],
(
shape
[
0
][
2
]
+
2
*
p
[
0
]
-
w
[
0
])
//
s
[
0
]
+
1
,
(
shape
[
0
][
3
]
+
2
*
p
[
1
]
-
w
[
1
])
//
s
[
1
]
+
1
]
if
nd
==
3
:
ret
.
append
((
shape
[
0
][
4
]
+
2
*
p
[
2
]
-
w
[
2
])
//
s
[
2
]
+
1
)
return
[
ret
]
def
c_support_code_struct
(
self
,
node
,
name
):
return
"""
...
...
@@ -1409,12 +1408,12 @@ cudnnStatus_t err%(name)s;
input
%(name)
s = NULL;
output
%(name)
s = NULL;
if ((err
%(name)
s = cudnnCreateTensorDescriptor(&input
%(name)
s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor
4d
descriptor "
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(inp):
%%
s", cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
if ((err
%(name)
s = cudnnCreateTensorDescriptor(&output
%(name)
s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor
4d
descriptor "
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(out):
%%
s", cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
...
...
@@ -1430,65 +1429,51 @@ if (output%(name)s != NULL) { cudnnDestroyTensorDescriptor(output%(name)s); }
desc
=
inputs
[
1
]
out
,
=
outputs
set_in
=
c_set_tensor4d
(
inputs
[
0
],
"input"
+
str
(
name
),
'err'
+
name
,
sub
[
'fail'
])
set_out
=
c_set_tensor4d
(
out
,
"output"
+
str
(
name
),
'err'
+
name
,
sub
[
'fail'
])
return
"""
cudnnStatus_t err
%(name)
s;
int
%(out)
s_dims[
4
];
int
%(out)
s_dims[
5
];
if (!CudaNdarray_is_c_contiguous(
%(input)
s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
%(fail)
s
}
%(set_in)
s
if (c_set_tensorNd(
%(input)
s,
%(input)
s->nd,
%(input_desc)
s) != 0)
%(fail)
s
cudnnPoolingMode_t mode;
int wsX, wsY, vpad, hpad, strideX, strideY;
#ifndef CUDNN_VERSION
err
%(name)
s = cudnnGetPoolingDescriptor(
%(desc)
s, &mode,
&wsX, &wsY,
&strideX, &strideY);
#else
err
%(name)
s = cudnnGetPooling2dDescriptor(
%(desc)
s, &mode,
&wsX, &wsY,
&vpad, &hpad,
&strideX, &strideY);
#endif
int win[3];
int pad[3];
int str[3];
int ndims;
err
%(name)
s = cudnnGetPoolingNdDescriptor(
%(desc)
s, 3,
&mode, &ndims,
win, pad, str);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnGetPoolingDescriptor operation:
%%
s",
"GpuDnnPool: error doing cudnnGetPooling
Nd
Descriptor operation:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
%(out)
s_dims[0] = CudaNdarray_HOST_DIMS(
%(input)
s)[0];
%(out)
s_dims[1] = CudaNdarray_HOST_DIMS(
%(input)
s)[1];
%(out)
s_dims[2] = (CudaNdarray_HOST_DIMS(
%(input)
s)[2] + (vpad*2) - wsX) / strideX + 1;
%(out)
s_dims[3] = (CudaNdarray_HOST_DIMS(
%(input)
s)[3] + (hpad*2) - wsY) / strideY + 1;
%(out)
s_dims[2] = (CudaNdarray_HOST_DIMS(
%(input)
s)[2] + (pad[0]*2) - win[0]) / str[0] + 1;
%(out)
s_dims[3] = (CudaNdarray_HOST_DIMS(
%(input)
s)[3] + (pad[1]*2) - win[1]) / str[1] + 1;
if (ndims == 3)
%(out)
s_dims[4] = (CudaNdarray_HOST_DIMS(
%(input)
s)[4] + (pad[2]*2) - win[2]) / str[2] + 1;
if (CudaNdarray_prep_output(&
%(out)
s,
4
,
%(out)
s_dims) != 0)
if (CudaNdarray_prep_output(&
%(out)
s,
ndims+2
,
%(out)
s_dims) != 0)
{
%(fail)
s
}
%(set_out)
s
#ifndef CUDNN_VERSION
err
%(name)
s = cudnnPoolingForward(
_handle,
%(desc)
s,
%(input_desc)
s, CudaNdarray_DEV_DATA(
%(input)
s),
%(output_desc)
s, CudaNdarray_DEV_DATA(
%(out)
s)
);
#else
if (c_set_tensorNd(
%(out)
s,
%(out)
s->nd,
%(output_desc)
s) != 0)
%(fail)
s
{
const float alpha = 1;
const float beta = 0;
...
...
@@ -1501,7 +1486,6 @@ _handle,
%(output_desc)
s, CudaNdarray_DEV_DATA(
%(out)
s)
);
}
#endif
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnPoolingForward operation:
%%
s",
...
...
@@ -1509,8 +1493,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
%(fail)
s
}
"""
%
dict
(
out
=
out
,
desc
=
desc
,
fail
=
sub
[
'fail'
],
name
=
name
,
set_in
=
set_in
,
set_out
=
set_out
,
input
=
inputs
[
0
],
name
=
name
,
input
=
inputs
[
0
],
input_desc
=
"input"
+
name
,
output_desc
=
"output"
+
name
)
...
...
@@ -1531,7 +1514,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return
[[
1
],
[
0
]]
def
c_code_cache_version
(
self
):
return
(
6
,
version
())
return
(
7
,
version
())
class
GpuDnnPoolGrad
(
DnnBase
):
...
...
@@ -1546,21 +1529,23 @@ class GpuDnnPoolGrad(DnnBase):
__props__
=
()
def
make_node
(
self
,
inp
,
out
,
inp_grad
,
desc
):
if
not
isinstance
(
desc
.
type
,
CDataType
)
\
or
desc
.
type
.
ctype
!=
'cudnnPoolingDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnPoolingDescriptor_t'
)
nd
=
desc
.
owner
.
op
.
nd
+
2
# 4 or 5
inp
=
as_cuda_ndarray_variable
(
inp
)
if
inp
.
type
.
ndim
!=
4
:
raise
TypeError
(
'inp must be
4D tensor'
)
if
inp
.
type
.
ndim
!=
nd
:
raise
TypeError
(
'inp must be
%
dD tensor'
%
(
nd
,)
)
inp_grad
=
as_cuda_ndarray_variable
(
inp_grad
)
if
inp_grad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'inp_grad must be
4D tensor'
)
if
inp_grad
.
type
.
ndim
!=
nd
:
raise
TypeError
(
'inp_grad must be
%
dD tensor'
%
(
nd
,)
)
out
=
as_cuda_ndarray_variable
(
out
)
if
out
.
type
.
ndim
!=
4
:
raise
TypeError
(
'out must be 4D tensor'
)
if
not
isinstance
(
desc
.
type
,
CDataType
)
\
or
desc
.
type
.
ctype
!=
'cudnnPoolingDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnPoolingDescriptor_t'
)
if
out
.
type
.
ndim
!=
nd
:
raise
TypeError
(
'out must be
%
dD tensor'
%
(
nd
,))
return
Apply
(
self
,
[
inp
,
out
,
inp_grad
,
desc
],
[
inp
.
type
()])
...
...
@@ -1621,18 +1606,6 @@ if (output_grad%(name)s != NULL) { cudnnDestroyTensorDescriptor(output_grad%(nam
out
,
inp
,
inp_grad
,
desc
=
inputs
out_grad
,
=
outputs
set_in
=
"
\n
"
.
join
([
c_set_tensor4d
(
inp
,
"input"
+
name
,
'err'
+
name
,
sub
[
'fail'
]),
c_set_tensor4d
(
inp_grad
,
"input_grad"
+
name
,
'err'
+
name
,
sub
[
'fail'
]),
c_set_tensor4d
(
out
,
"output"
+
name
,
'err'
+
name
,
sub
[
'fail'
])
])
set_out
=
c_set_tensor4d
(
out
,
"output_grad"
+
name
,
'err'
+
name
,
sub
[
'fail'
])
return
"""
cudnnStatus_t err
%(name)
s;
...
...
@@ -1654,25 +1627,23 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) {
%(fail)
s
}
%(set_in)
s
if (c_set_tensorNd(
%(input)
s,
%(input)
s->nd,
%(input_desc)
s) != 0)
%(fail)
s
if (c_set_tensorNd(
%(input_grad)
s,
%(input_grad)
s->nd,
%(input_grad_desc)
s) != 0)
%(fail)
s
if (c_set_tensorNd(
%(output)
s,
%(output)
s->nd,
%(output_desc)
s) != 0)
%(fail)
s
if (CudaNdarray_prep_output(&
%(output_grad)
s, 4,
if (CudaNdarray_prep_output(&
%(output_grad)
s,
%(output)
s->nd,
CudaNdarray_HOST_DIMS(
%(output)
s)) != 0)
{
%(fail)
s
}
%(set_out)
s
#ifndef CUDNN_VERSION
err
%(name)
s = cudnnPoolingBackward(
_handle,
%(desc)
s,
%(input_desc)
s, CudaNdarray_DEV_DATA(
%(input)
s),
%(input_grad_desc)
s, CudaNdarray_DEV_DATA(
%(input_grad)
s),
%(output_desc)
s, CudaNdarray_DEV_DATA(
%(output)
s),
%(output_grad_desc)
s, CudaNdarray_DEV_DATA(
%(output_grad)
s)
);
#else
if (c_set_tensorNd(
%(output_grad)
s,
%(output_grad)
s->nd,
%(output_grad_desc)
s) != 0)
%(fail)
s
{
const float alpha = 1;
const float beta = 0;
...
...
@@ -1687,51 +1658,28 @@ _handle,
%(output_grad_desc)
s, CudaNdarray_DEV_DATA(
%(output_grad)
s)
);
}
#endif
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPoolGrad: error doing operation:
%%
s. "
"input.shape=(
%%
d,
%%
d,
%%
d,
%%
d) "
"input_grad.shape=(
%%
d,
%%
d,
%%
d,
%%
d) "
"output.shape=(
%%
d,
%%
d,
%%
d,
%%
d) "
"output_grad.shape=(
%%
d,
%%
d,
%%
d,
%%
d)",
cudnnGetErrorString(err
%(name)
s),
CudaNdarray_HOST_DIMS(
%(input)
s)[0],
CudaNdarray_HOST_DIMS(
%(input)
s)[1],
CudaNdarray_HOST_DIMS(
%(input)
s)[2],
CudaNdarray_HOST_DIMS(
%(input)
s)[3],
CudaNdarray_HOST_DIMS(
%(input_grad)
s)[0],
CudaNdarray_HOST_DIMS(
%(input_grad)
s)[1],
CudaNdarray_HOST_DIMS(
%(input_grad)
s)[2],
CudaNdarray_HOST_DIMS(
%(input_grad)
s)[3],
CudaNdarray_HOST_DIMS(
%(output)
s)[0],
CudaNdarray_HOST_DIMS(
%(output)
s)[1],
CudaNdarray_HOST_DIMS(
%(output)
s)[2],
CudaNdarray_HOST_DIMS(
%(output)
s)[3],
CudaNdarray_HOST_DIMS(
%(output_grad)
s)[0],
CudaNdarray_HOST_DIMS(
%(output_grad)
s)[1],
CudaNdarray_HOST_DIMS(
%(output_grad)
s)[2],
CudaNdarray_HOST_DIMS(
%(output_grad)
s)[3]
);
%(fail)
s
"GpuDnnPoolGrad: error doing operation:
%%
s.",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
"""
%
dict
(
output_grad
=
out_grad
,
desc
=
desc
,
fail
=
sub
[
'fail'
],
name
=
name
,
set_in
=
set_in
,
set_out
=
set_out
,
input
=
inp
,
input_grad
=
inp_grad
,
output
=
out
,
fail
=
sub
[
'fail'
],
name
=
name
,
input
=
inp
,
input_grad
=
inp_grad
,
output
=
out
,
input_desc
=
"input"
+
name
,
input_grad_desc
=
"input_grad"
+
name
,
output_desc
=
"output"
+
name
,
output_grad_desc
=
"output_grad"
+
name
)
def
c_code_cache_version
(
self
):
return
(
5
,
version
())
return
(
6
,
version
())
def
infer_shape
(
self
,
node
,
shape
):
return
[
shape
[
0
]]
def
dnn_pool
(
img
,
ws
,
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)):
def
dnn_pool
(
img
,
ws
,
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)
,
nd
=
2
):
"""
GPU pooling using cuDNN from NVIDIA.
...
...
@@ -1746,6 +1694,9 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
:param pad: (padX, padY) padding information.
padX is the size of the left and right borders,
padY is the size of the top and bottom borders.
:param nd: dimensions of pooling, can be 2 or 3 for 2d or 3d pooling
If set to 3 all other params (except mode) must have an extra
dimension to match. 3 is only available for cudnn v3
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
...
...
@@ -1753,7 +1704,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
:note: This Op implements the ignore_border=True of max_pool_2d.
"""
img
=
gpu_contiguous
(
img
)
desc
=
GpuDnnPoolDesc
(
ws
=
ws
,
stride
=
stride
,
mode
=
mode
,
pad
=
pad
)()
desc
=
GpuDnnPoolDesc
(
ws
=
ws
,
stride
=
stride
,
mode
=
mode
,
pad
=
pad
,
nd
=
nd
)()
return
GpuDnnPool
()(
img
,
desc
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论