Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
7ddf071c
提交
7ddf071c
authored
3月 24, 2016
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4256 from harmdevries89/gpupool_newbackend
Gpupool newbackend
上级
963eac17
720355c3
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
234 行增加
和
95 行删除
+234
-95
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+19
-14
dnn.py
theano/sandbox/gpuarray/dnn.py
+101
-58
dnn_pool.c
theano/sandbox/gpuarray/dnn_pool.c
+27
-8
dnn_pool_grad.c
theano/sandbox/gpuarray/dnn_pool_grad.c
+32
-2
test_dnn.py
theano/sandbox/gpuarray/tests/test_dnn.py
+55
-13
没有找到文件。
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
7ddf071c
...
@@ -364,28 +364,32 @@ def test_pooling_with_tensor_vars():
...
@@ -364,28 +364,32 @@ def test_pooling_with_tensor_vars():
cast_to_output_type
=
False
,
cast_to_output_type
=
False
,
mode
=
mode_with_gpu
)
mode
=
mode_with_gpu
)
out2
=
pool_2d_i2n
(
x
,
ds
=
(
2
,
2
),
strides
=
(
1
,
1
),
pad
=
(
0
,
0
),
pool_function
=
T
.
max
)
mode_without_gpu2
=
mode_without_gpu
.
including
()
mode_without_gpu2
=
mode_without_gpu
.
including
()
mode_without_gpu2
.
check_isfinite
=
False
mode_without_gpu2
.
check_isfinite
=
False
f
1
=
theano
.
function
([
x
],
fn
(
x
),
mode
=
mode_with_gpu
)
f
_gpu
=
theano
.
function
([
x
],
fn
(
x
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
node
in
f1
.
maker
.
fgraph
.
apply_nodes
])
for
node
in
f_gpu
.
maker
.
fgraph
.
apply_nodes
])
f2
=
theano
.
function
([
x
],
out2
,
mode
=
mode_without_gpu2
)
assert
not
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
i
=
1
for
node
in
f2
.
maker
.
fgraph
.
apply_nodes
])
for
shp
in
[(
1
,
10
,
100
,
100
),
for
shp
in
[(
1
,
10
,
100
,
100
),
(
1
,
3
,
99
,
99
),
(
1
,
3
,
99
,
99
),
(
32
,
1
,
147
,
197
),
(
32
,
1
,
147
,
197
)]:
]:
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
a
=
f1
(
data
)
.
__array__
()
out
=
pool_2d_i2n
(
x
,
ds
=
(
i
,
i
),
strides
=
(
1
,
1
),
pad
=
(
0
,
0
),
b
=
f2
(
data
)
.
__array__
()
pool_function
=
T
.
max
)
f_cpu
=
theano
.
function
([
x
],
out
,
mode
=
mode_without_gpu2
)
assert
not
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
node
in
f_cpu
.
maker
.
fgraph
.
apply_nodes
])
# Change the window size dynamically for gpu op
ws
.
set_value
(
numpy
.
array
([
i
,
i
])
.
astype
(
'int32'
))
a
=
f_gpu
(
data
)
.
__array__
()
b
=
f_cpu
(
data
)
.
__array__
()
utt
.
assert_allclose
(
a
,
b
)
utt
.
assert_allclose
(
a
,
b
)
i
+=
1
def
test_old_pool_interface
():
def
test_old_pool_interface
():
...
@@ -745,6 +749,7 @@ def test_dnn_tag():
...
@@ -745,6 +749,7 @@ def test_dnn_tag():
class
TestDnnInferShapes
(
utt
.
InferShapeTester
):
class
TestDnnInferShapes
(
utt
.
InferShapeTester
):
def
setUp
(
self
):
def
setUp
(
self
):
super
(
TestDnnInferShapes
,
self
)
.
setUp
()
super
(
TestDnnInferShapes
,
self
)
.
setUp
()
self
.
mode
=
mode_with_gpu
self
.
mode
=
mode_with_gpu
...
...
theano/sandbox/gpuarray/dnn.py
浏览文件 @
7ddf071c
...
@@ -142,6 +142,7 @@ dnn_available.msg = None
...
@@ -142,6 +142,7 @@ dnn_available.msg = None
class
DnnBase
(
COp
):
class
DnnBase
(
COp
):
"""
"""
Creates a handle for cudnn and pulls in the cudnn libraries and headers.
Creates a handle for cudnn and pulls in the cudnn libraries and headers.
...
@@ -255,6 +256,7 @@ version.v = None
...
@@ -255,6 +256,7 @@ version.v = None
class
GpuDnnConvDesc
(
COp
):
class
GpuDnnConvDesc
(
COp
):
"""
"""
This Op builds a convolution descriptor for use in the other convolution
This Op builds a convolution descriptor for use in the other convolution
operations.
operations.
...
@@ -388,6 +390,7 @@ def ensure_dt(val, default, name, dtype):
...
@@ -388,6 +390,7 @@ def ensure_dt(val, default, name, dtype):
class
GpuDnnConv
(
DnnBase
):
class
GpuDnnConv
(
DnnBase
):
"""
"""
The forward convolution.
The forward convolution.
...
@@ -555,6 +558,7 @@ class GpuDnnConv(DnnBase):
...
@@ -555,6 +558,7 @@ class GpuDnnConv(DnnBase):
class
GpuDnnConvGradW
(
DnnBase
):
class
GpuDnnConvGradW
(
DnnBase
):
"""
"""
The convolution gradient with respect to the weights.
The convolution gradient with respect to the weights.
...
@@ -675,6 +679,7 @@ class GpuDnnConvGradW(DnnBase):
...
@@ -675,6 +679,7 @@ class GpuDnnConvGradW(DnnBase):
class
GpuDnnConvGradI
(
DnnBase
):
class
GpuDnnConvGradI
(
DnnBase
):
"""
"""
The convolution gradient with respect to the inputs.
The convolution gradient with respect to the inputs.
...
@@ -943,6 +948,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
...
@@ -943,6 +948,7 @@ def dnn_gradinput(kerns, topgrad, img_shp, border_mode='valid',
class
GpuDnnPoolDesc
(
Op
):
class
GpuDnnPoolDesc
(
Op
):
"""
"""
This Op builds a pooling descriptor for use in the other
This Op builds a pooling descriptor for use in the other
pooling operations.
pooling operations.
...
@@ -1061,69 +1067,87 @@ class GpuDnnPoolDesc(Op):
...
@@ -1061,69 +1067,87 @@ class GpuDnnPoolDesc(Op):
class
GpuDnnPool
(
DnnBase
):
class
GpuDnnPool
(
DnnBase
):
"""
Pooling.
"""
Parameters
Parameters
----------
----------
img
img
The image 4d tensor.
The image 4d or 5d tensor.
desc
Parameters
The pooling descriptor.
----------
ws : tensor variable
Window size.
stride : tensor variable
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
(padX, padY) or (padX, padY, padZ)
"""
"""
__props__
=
()
__props__
=
(
'mode'
,
)
def
__init__
(
self
):
def
__init__
(
self
,
mode
=
'max'
):
DnnBase
.
__init__
(
self
,
[
"dnn_pool.c"
],
"APPLY_SPECIFIC(dnn_pool)"
)
DnnBase
.
__init__
(
self
,
[
"dnn_pool.c"
],
"APPLY_SPECIFIC(dnn_pool)"
)
if
mode
==
'average'
:
mode
=
'average_inc_pad'
assert
mode
in
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
self
.
mode
=
mode
def
make_node
(
self
,
img
,
desc
):
def
get_op_params
(
self
):
img
=
as_gpuarray_variable
(
img
,
infer_context_name
(
img
))
if
self
.
mode
==
'max'
:
mode_flag
=
'CUDNN_POOLING_MAX'
elif
self
.
mode
==
"average_inc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif
self
.
mode
==
"average_exc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if
desc
.
owner
is
not
None
:
return
[(
'MODE_FLAG'
,
mode_flag
)]
e_ndim
=
desc
.
owner
.
op
.
get_ndim
()
+
2
if
img
.
type
.
ndim
!=
e_ndim
:
def
make_node
(
self
,
img
,
ws
,
stride
,
pad
):
raise
TypeError
(
'img must be
%
dD tensor'
%
(
e_ndim
,))
ctx_name
=
infer_context_name
(
img
)
img
=
as_gpuarray_variable
(
img
,
ctx_name
)
if
(
not
isinstance
(
desc
.
type
,
CDataType
)
or
ws
=
tensor
.
as_tensor_variable
(
ws
)
desc
.
type
.
ctype
!=
'cudnnPoolingDescriptor_t'
):
stride
=
tensor
.
as_tensor_variable
(
stride
)
raise
TypeError
(
'desc must be cudnnPoolingDescriptor_t'
)
pad
=
tensor
.
as_tensor_variable
(
pad
)
assert
ws
.
type
.
ndim
==
stride
.
type
.
ndim
and
ws
.
type
.
ndim
==
pad
.
type
.
ndim
assert
ws
.
type
.
ndim
==
1
return
Apply
(
self
,
[
img
,
desc
],
[
img
.
type
()])
return
Apply
(
self
,
[
img
,
ws
,
stride
,
pad
],
[
img
.
type
()])
def
infer_shape
(
self
,
node
,
shape
):
def
infer_shape
(
self
,
node
,
shape
):
desc
=
node
.
inputs
[
1
]
.
owner
.
op
w
=
node
.
inputs
[
1
]
w
=
desc
.
ws
s
=
node
.
inputs
[
2
]
s
=
desc
.
stride
p
=
node
.
inputs
[
3
]
p
=
desc
.
pad
res
=
[
shape
[
0
][
0
],
shape
[
0
][
1
],
res
=
[
shape
[
0
][
0
],
shape
[
0
][
1
],
(
shape
[
0
][
2
]
+
2
*
p
[
0
]
-
w
[
0
])
//
s
[
0
]
+
1
,
(
shape
[
0
][
2
]
+
2
*
p
[
0
]
-
w
[
0
])
//
s
[
0
]
+
1
,
(
shape
[
0
][
3
]
+
2
*
p
[
1
]
-
w
[
1
])
//
s
[
1
]
+
1
(
shape
[
0
][
3
]
+
2
*
p
[
1
]
-
w
[
1
])
//
s
[
1
]
+
1
]
]
if
len
(
w
)
>
2
:
if
node
.
inputs
[
0
]
.
ndim
==
5
:
res
.
append
((
shape
[
0
][
4
]
+
2
*
p
[
2
]
-
w
[
2
])
//
s
[
2
]
+
1
)
res
.
append
((
shape
[
0
][
4
]
+
2
*
p
[
2
]
-
w
[
2
])
//
s
[
2
]
+
1
)
return
[
res
]
return
[
res
]
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
img
,
desc
=
inp
img
,
ws
,
stride
,
pad
=
inp
grad
,
=
grads
grad
,
=
grads
grad
=
gpu_contiguous
(
grad
)
grad
=
gpu_contiguous
(
grad
)
out
=
self
(
img
,
desc
)
out
=
self
(
img
,
ws
,
stride
,
pad
)
g_out
=
GpuDnnPoolGrad
(
)(
img
,
out
,
grad
,
desc
)
g_out
=
GpuDnnPoolGrad
(
mode
=
self
.
mode
)(
img
,
out
,
grad
,
ws
,
stride
,
pad
)
return
g_out
,
theano
.
gradient
.
DisconnectedType
()()
return
g_out
,
theano
.
gradient
.
DisconnectedType
()()
,
theano
.
gradient
.
DisconnectedType
()(),
theano
.
gradient
.
DisconnectedType
()()
def
connection_pattern
(
self
,
node
):
def
connection_pattern
(
self
,
node
):
# not connected to
desc
# not connected to
parameters
return
[[
1
],
[
0
]]
return
[[
1
],
[
0
]
,
[
0
],
[
0
]
]
class
GpuDnnPoolGrad
(
DnnBase
):
class
GpuDnnPoolGrad
(
DnnBase
):
"""
"""
The pooling gradient.
The pooling gradient.
...
@@ -1135,40 +1159,56 @@ class GpuDnnPoolGrad(DnnBase):
...
@@ -1135,40 +1159,56 @@ class GpuDnnPoolGrad(DnnBase):
The output of the pooling in the forward.
The output of the pooling in the forward.
out_grad
out_grad
Same size as out, but is the corresponding gradient information.
Same size as out, but is the corresponding gradient information.
desc
ws : tensor variable
The pooling descriptor.
Window size.
stride : tensor variable
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
(padX, padY) or (padX, padY, padZ)
"""
"""
__props__
=
()
__props__
=
(
'mode'
,
)
def
__init__
(
self
):
def
__init__
(
self
,
mode
=
'max'
):
DnnBase
.
__init__
(
self
,
[
"dnn_pool_grad.c"
],
DnnBase
.
__init__
(
self
,
[
"dnn_pool_grad.c"
],
"APPLY_SPECIFIC(dnn_pool_grad)"
)
"APPLY_SPECIFIC(dnn_pool_grad)"
)
if
mode
==
'average'
:
mode
=
'average_inc_pad'
assert
mode
in
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
self
.
mode
=
mode
def
get_op_params
(
self
):
if
self
.
mode
==
'max'
:
mode_flag
=
'CUDNN_POOLING_MAX'
elif
self
.
mode
==
"average_inc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif
self
.
mode
==
"average_exc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
def
make_node
(
self
,
inp
,
out
,
out_grad
,
desc
):
return
[(
'MODE_FLAG'
,
mode_flag
)]
def
make_node
(
self
,
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
):
ctx_name
=
infer_context_name
(
inp
,
out
,
out_grad
)
ctx_name
=
infer_context_name
(
inp
,
out
,
out_grad
)
inp
=
as_gpuarray_variable
(
inp
,
ctx_name
)
inp
=
as_gpuarray_variable
(
inp
,
ctx_name
)
assert
(
inp
.
ndim
in
[
4
,
5
])
out_grad
=
as_gpuarray_variable
(
out_grad
,
ctx_name
)
out_grad
=
as_gpuarray_variable
(
out_grad
,
ctx_name
)
assert
(
out_grad
.
ndim
in
[
4
,
5
])
out
=
as_gpuarray_variable
(
out
,
ctx_name
)
out
=
as_gpuarray_variable
(
out
,
ctx_name
)
assert
(
out
.
ndim
in
[
4
,
5
])
if
desc
.
owner
is
not
None
:
assert
(
out_grad
.
ndim
==
inp
.
ndim
)
nd
=
desc
.
owner
.
op
.
get_ndim
()
+
2
assert
(
inp
.
ndim
==
out
.
ndim
)
if
inp
.
type
.
ndim
!=
nd
:
raise
TypeError
(
'inp must be
%
dD tensor'
%
(
nd
,))
if
out_grad
.
type
.
ndim
!=
nd
:
ws
=
tensor
.
as_tensor_variable
(
ws
)
raise
TypeError
(
'out_grad must be
%
dD tensor'
%
(
nd
,))
stride
=
tensor
.
as_tensor_variable
(
stride
)
pad
=
tensor
.
as_tensor_variable
(
pad
)
assert
ws
.
type
.
ndim
==
stride
.
type
.
ndim
and
ws
.
type
.
ndim
==
pad
.
type
.
ndim
assert
ws
.
type
.
ndim
==
1
if
out
.
type
.
ndim
!=
nd
:
return
Apply
(
self
,
[
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
],
[
inp
.
type
()])
raise
TypeError
(
'out must be
%
dD tensor'
%
(
nd
,))
if
(
not
isinstance
(
desc
.
type
,
CDataType
)
or
desc
.
type
.
ctype
!=
'cudnnPoolingDescriptor_t'
):
raise
TypeError
(
'desc must be cudnnPoolingDescriptor_t'
)
return
Apply
(
self
,
[
inp
,
out
,
out_grad
,
desc
],
[
inp
.
type
()])
def
infer_shape
(
self
,
node
,
shape
):
def
infer_shape
(
self
,
node
,
shape
):
return
[
shape
[
0
]]
return
[
shape
[
0
]]
...
@@ -1206,11 +1246,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
...
@@ -1206,11 +1246,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
"""
img
=
gpu_contiguous
(
img
)
img
=
gpu_contiguous
(
img
)
desc
=
GpuDnnPoolDesc
(
ws
=
ws
,
stride
=
stride
,
mode
=
mode
,
pad
=
pad
)()
return
GpuDnnPool
(
mode
=
mode
)(
img
,
ws
,
stride
,
pad
)
return
GpuDnnPool
()(
img
,
desc
)
class
GpuDnnSoftmaxBase
(
DnnBase
):
class
GpuDnnSoftmaxBase
(
DnnBase
):
"""
"""
Op for the cuDNN Softmax.
Op for the cuDNN Softmax.
...
@@ -1263,6 +1303,7 @@ class GpuDnnSoftmaxBase(DnnBase):
...
@@ -1263,6 +1303,7 @@ class GpuDnnSoftmaxBase(DnnBase):
class
GpuDnnSoftmax
(
GpuDnnSoftmaxBase
):
class
GpuDnnSoftmax
(
GpuDnnSoftmaxBase
):
"""
"""
Op for the cuDNN Softmax.
Op for the cuDNN Softmax.
...
@@ -1296,6 +1337,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
...
@@ -1296,6 +1337,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
class
GpuDnnSoftmaxGrad
(
GpuDnnSoftmaxBase
):
class
GpuDnnSoftmaxGrad
(
GpuDnnSoftmaxBase
):
"""
"""
Op for the cuDNN SoftmaxGrad.
Op for the cuDNN SoftmaxGrad.
...
@@ -1467,11 +1509,12 @@ def local_pool_dnn_grad_stride(node, ctx_name):
...
@@ -1467,11 +1509,12 @@ def local_pool_dnn_grad_stride(node, ctx_name):
pad
=
node
.
op
.
padding
pad
=
node
.
op
.
padding
mode
=
node
.
op
.
mode
mode
=
node
.
op
.
mode
desc
=
GpuDnnPoolDesc
(
ws
=
ds
,
stride
=
st
,
mode
=
mode
,
pad
=
pad
)()
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
return
GpuDnnPoolGrad
()(
gpu_contiguous
(
inp
),
gpu_contiguous
(
out
),
gpu_contiguous
(
out
),
gpu_contiguous
(
out_grad
),
gpu_contiguous
(
out_grad
),
ds
,
desc
)
st
,
pad
)
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
...
@@ -1491,11 +1534,10 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
...
@@ -1491,11 +1534,10 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
cg
=
gpu_contiguous
(
out_grad
)
cg
=
gpu_contiguous
(
out_grad
)
desc
=
GpuDnnPoolDesc
(
ws
=
ds
,
stride
=
st
,
mode
=
mode
,
pad
=
pad
)()
# We reuse cg because CuDNN does not use the value of the `out`
# We reuse cg because CuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
# has been observed in v2 and v3 as far as I know.
return
GpuDnnPoolGrad
(
)(
gpu_contiguous
(
inp
),
cg
,
cg
,
desc
)
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
cg
,
cg
,
ds
,
st
,
pad
)
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
...
@@ -1548,6 +1590,7 @@ def local_logsoftmax_to_dnn(node, ctx_name):
...
@@ -1548,6 +1590,7 @@ def local_logsoftmax_to_dnn(node, ctx_name):
class
NoCuDNNRaise
(
Optimizer
):
class
NoCuDNNRaise
(
Optimizer
):
def
apply
(
self
,
fgraph
):
def
apply
(
self
,
fgraph
):
"""
"""
Raise a error if cudnn can't be used.
Raise a error if cudnn can't be used.
...
...
theano/sandbox/gpuarray/dnn_pool.c
浏览文件 @
7ddf071c
...
@@ -2,12 +2,15 @@
...
@@ -2,12 +2,15 @@
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
input
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
input
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
output
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
output
);
cudnnPoolingDescriptor_t
APPLY_SPECIFIC
(
pool
);
#section init_code_struct
#section init_code_struct
cudnnStatus_t
APPLY_SPECIFIC
(
err
);
cudnnStatus_t
APPLY_SPECIFIC
(
err
);
APPLY_SPECIFIC
(
input
)
=
NULL
;
APPLY_SPECIFIC
(
input
)
=
NULL
;
APPLY_SPECIFIC
(
output
)
=
NULL
;
APPLY_SPECIFIC
(
output
)
=
NULL
;
APPLY_SPECIFIC
(
pool
)
=
NULL
;
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
input
)))
!=
CUDNN_STATUS_SUCCESS
)
{
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreateTensorDescriptor
(
&
APPLY_SPECIFIC
(
input
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor descriptor "
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate tensor descriptor "
...
@@ -19,16 +22,25 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output)))
...
@@ -19,16 +22,25 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output)))
"(out): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
"(out): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
FAIL
;
FAIL
;
}
}
if
((
APPLY_SPECIFIC
(
err
)
=
cudnnCreatePoolingDescriptor
(
&
APPLY_SPECIFIC
(
pool
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate pooling descriptor"
"(pool): %s"
,
cudnnGetErrorString
(
APPLY_SPECIFIC
(
err
)));
FAIL
;
}
#section cleanup_code_struct
#section cleanup_code_struct
if
(
APPLY_SPECIFIC
(
input
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
input
));
}
if
(
APPLY_SPECIFIC
(
input
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
input
));
}
if
(
APPLY_SPECIFIC
(
output
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
output
));
}
if
(
APPLY_SPECIFIC
(
output
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
output
));
}
if
(
APPLY_SPECIFIC
(
pool
)
!=
NULL
)
{
cudnnDestroyPoolingDescriptor
(
APPLY_SPECIFIC
(
pool
));
}
#section support_code_struct
#section support_code_struct
int
APPLY_SPECIFIC
(
dnn_pool
)(
PyGpuArrayObject
*
img
,
int
APPLY_SPECIFIC
(
dnn_pool
)(
PyGpuArrayObject
*
img
,
cudnnPoolingDescriptor_t
desc
,
PyArrayObject
*
ws
,
PyArrayObject
*
stride
,
PyArrayObject
*
pad
,
PyGpuArrayObject
**
out
,
PyGpuArrayObject
**
out
,
PyGpuContextObject
*
c
)
{
PyGpuContextObject
*
c
)
{
cudnnStatus_t
err
;
cudnnStatus_t
err
;
...
@@ -46,14 +58,21 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
...
@@ -46,14 +58,21 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
int
w
[
3
];
int
w
[
3
];
int
p
[
3
];
int
p
[
3
];
int
s
[
3
];
int
s
[
3
];
int
ndims
;
int
ndims
=
PyArray_DIM
(
ws
,
0
);
//PyGpuArray_NDIM(img) - 2;
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
w
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
ws
,
i
));
}
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
p
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
pad
,
i
));
}
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
}
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
err
=
cudnnGetPoolingNdDescriptor
(
desc
,
3
,
&
mode
,
&
ndims
,
w
,
p
,
s
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
"error doing cudnnGetPoolingDescriptor operation: %s"
,
cudnnGetErrorString
(
err
));
return
1
;
}
}
dims
[
0
]
=
PyGpuArray_DIM
(
img
,
0
);
dims
[
0
]
=
PyGpuArray_DIM
(
img
,
0
);
...
@@ -98,7 +117,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
...
@@ -98,7 +117,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
cuda_wait
((
*
out
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
out
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnPoolingForward
(
err
=
cudnnPoolingForward
(
APPLY_SPECIFIC
(
_handle
),
desc
,
APPLY_SPECIFIC
(
_handle
),
APPLY_SPECIFIC
(
pool
)
,
alpha
,
alpha
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
img
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
img
),
beta
,
beta
,
...
...
theano/sandbox/gpuarray/dnn_pool_grad.c
浏览文件 @
7ddf071c
...
@@ -4,6 +4,7 @@ cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
...
@@ -4,6 +4,7 @@ cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
input_grad
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
input_grad
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
output
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
output
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
output_grad
);
cudnnTensorDescriptor_t
APPLY_SPECIFIC
(
output_grad
);
cudnnPoolingDescriptor_t
APPLY_SPECIFIC
(
pool
);
#section init_code_struct
#section init_code_struct
...
@@ -11,6 +12,7 @@ APPLY_SPECIFIC(input) = NULL;
...
@@ -11,6 +12,7 @@ APPLY_SPECIFIC(input) = NULL;
APPLY_SPECIFIC
(
input_grad
)
=
NULL
;
APPLY_SPECIFIC
(
input_grad
)
=
NULL
;
APPLY_SPECIFIC
(
output
)
=
NULL
;
APPLY_SPECIFIC
(
output
)
=
NULL
;
APPLY_SPECIFIC
(
output_grad
)
=
NULL
;
APPLY_SPECIFIC
(
output_grad
)
=
NULL
;
APPLY_SPECIFIC
(
pool
)
=
NULL
;
{
{
cudnnStatus_t
err
;
cudnnStatus_t
err
;
...
@@ -38,6 +40,11 @@ APPLY_SPECIFIC(output_grad) = NULL;
...
@@ -38,6 +40,11 @@ APPLY_SPECIFIC(output_grad) = NULL;
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
FAIL
;
FAIL
;
}
}
if
((
err
=
cudnnCreatePoolingDescriptor
(
&
APPLY_SPECIFIC
(
pool
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_MemoryError
,
"could not allocate pooling descriptor"
"(pool): %s"
,
cudnnGetErrorString
(
err
));
FAIL
;
}
}
}
#section cleanup_code_struct
#section cleanup_code_struct
...
@@ -46,13 +53,16 @@ if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC
...
@@ -46,13 +53,16 @@ if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC
if
(
APPLY_SPECIFIC
(
input_grad
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
input_grad
));
}
if
(
APPLY_SPECIFIC
(
input_grad
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
input_grad
));
}
if
(
APPLY_SPECIFIC
(
output
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
output
));
}
if
(
APPLY_SPECIFIC
(
output
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
output
));
}
if
(
APPLY_SPECIFIC
(
output_grad
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
output_grad
));
}
if
(
APPLY_SPECIFIC
(
output_grad
)
!=
NULL
)
{
cudnnDestroyTensorDescriptor
(
APPLY_SPECIFIC
(
output_grad
));
}
if
(
APPLY_SPECIFIC
(
pool
)
!=
NULL
)
{
cudnnDestroyPoolingDescriptor
(
APPLY_SPECIFIC
(
pool
));
}
#section support_code_struct
#section support_code_struct
int
APPLY_SPECIFIC
(
dnn_pool_grad
)(
PyGpuArrayObject
*
inp
,
int
APPLY_SPECIFIC
(
dnn_pool_grad
)(
PyGpuArrayObject
*
inp
,
PyGpuArrayObject
*
out
,
PyGpuArrayObject
*
out
,
PyGpuArrayObject
*
out_grad
,
PyGpuArrayObject
*
out_grad
,
cudnnPoolingDescriptor_t
desc
,
PyArrayObject
*
ws
,
PyArrayObject
*
stride
,
PyArrayObject
*
pad
,
PyGpuArrayObject
**
inp_grad
,
PyGpuArrayObject
**
inp_grad
,
PyGpuContextObject
*
c
)
{
PyGpuContextObject
*
c
)
{
cudnnStatus_t
err
;
cudnnStatus_t
err
;
...
@@ -85,6 +95,26 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
...
@@ -85,6 +95,26 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
return
1
;
return
1
;
}
}
int
w
[
3
];
int
p
[
3
];
int
s
[
3
];
int
ndims
=
PyArray_DIM
(
ws
,
0
);
//PyGpuArray_NDIM(img) - 2;
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
w
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
ws
,
i
));
}
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
p
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
pad
,
i
));
}
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
s
[
i
]
=
*
((
npy_intp
*
)
PyArray_GETPTR1
(
stride
,
i
));
}
err
=
cudnnSetPoolingNdDescriptor
(
APPLY_SPECIFIC
(
pool
),
MODE_FLAG
,
ndims
,
w
,
p
,
s
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"could not set op descriptor %s"
,
cudnnGetErrorString
(
err
));
}
if
(
c_set_tensorNd
(
*
inp_grad
,
APPLY_SPECIFIC
(
input_grad
))
!=
0
)
if
(
c_set_tensorNd
(
*
inp_grad
,
APPLY_SPECIFIC
(
input_grad
))
!=
0
)
return
1
;
return
1
;
...
@@ -118,7 +148,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
...
@@ -118,7 +148,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
cuda_wait
((
*
inp_grad
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
inp_grad
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnPoolingBackward
(
err
=
cudnnPoolingBackward
(
APPLY_SPECIFIC
(
_handle
),
desc
,
APPLY_SPECIFIC
(
_handle
),
APPLY_SPECIFIC
(
pool
)
,
alpha
,
alpha
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
out
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
out
),
APPLY_SPECIFIC
(
output_grad
),
PyGpuArray_DEV_DATA
(
out_grad
),
APPLY_SPECIFIC
(
output_grad
),
PyGpuArray_DEV_DATA
(
out_grad
),
...
...
theano/sandbox/gpuarray/tests/test_dnn.py
浏览文件 @
7ddf071c
...
@@ -275,6 +275,55 @@ def test_pooling():
...
@@ -275,6 +275,55 @@ def test_pooling():
utt
.
assert_allclose
(
c_out
,
g_out
)
utt
.
assert_allclose
(
c_out
,
g_out
)
def
test_pooling_with_tensor_vars
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
x
=
T
.
ftensor4
()
ws
=
theano
.
shared
(
numpy
.
array
([
2
,
2
],
dtype
=
'int32'
))
st
=
theano
.
shared
(
numpy
.
array
([
1
,
1
],
dtype
=
'int32'
))
pad
=
theano
.
shared
(
numpy
.
array
([
0
,
0
],
dtype
=
'int32'
))
mode
=
'max'
def
fn
(
x
):
dnn_op
=
dnn
.
dnn_pool
(
x
,
ws
=
ws
,
stride
=
st
,
pad
=
pad
,
mode
=
mode
)
return
dnn_op
for
shp
in
[(
1
,
1
,
2
,
2
),
(
1
,
1
,
3
,
3
)]:
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
*
10
theano
.
tests
.
unittest_tools
.
verify_grad
(
fn
,
[
data
],
cast_to_output_type
=
False
,
mode
=
mode_with_gpu
)
out2
=
pool_2d_i2n
(
x
,
ds
=
(
2
,
2
),
strides
=
(
1
,
1
),
pad
=
(
0
,
0
),
pool_function
=
T
.
max
)
mode_without_gpu2
=
mode_without_gpu
.
including
()
mode_without_gpu2
.
check_isfinite
=
False
f1
=
theano
.
function
([
x
],
fn
(
x
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPool
)
for
node
in
f1
.
maker
.
fgraph
.
apply_nodes
])
f2
=
theano
.
function
([
x
],
out2
,
mode
=
mode_without_gpu2
)
assert
not
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPool
)
for
node
in
f2
.
maker
.
fgraph
.
apply_nodes
])
for
shp
in
[(
1
,
10
,
100
,
100
),
(
1
,
3
,
99
,
99
),
(
32
,
1
,
147
,
197
),
]:
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
a
=
f1
(
data
)
.
__array__
()
b
=
f2
(
data
)
.
__array__
()
utt
.
assert_allclose
(
a
,
b
)
def
test_pooling_opt
():
def
test_pooling_opt
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
@@ -340,6 +389,7 @@ def test_dnn_tag():
...
@@ -340,6 +389,7 @@ def test_dnn_tag():
class
TestDnnInferShapes
(
utt
.
InferShapeTester
):
class
TestDnnInferShapes
(
utt
.
InferShapeTester
):
def
setUp
(
self
):
def
setUp
(
self
):
super
(
TestDnnInferShapes
,
self
)
.
setUp
()
super
(
TestDnnInferShapes
,
self
)
.
setUp
()
self
.
mode
=
mode_with_gpu
self
.
mode
=
mode_with_gpu
...
@@ -525,14 +575,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -525,14 +575,9 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(
1
,
1
),
(
2
,
2
),
(
3
,
3
)],
[(
1
,
1
),
(
2
,
2
),
(
3
,
3
)],
modes
modes
):
):
desc
=
dnn
.
GpuDnnPoolDesc
(
ws
=
params
[
0
],
stride
=
params
[
1
],
mode
=
params
[
2
]
)()
self
.
_compile_and_check
(
self
.
_compile_and_check
(
[
img
],
[
img
],
[
dnn
.
GpuDnnPool
(
)(
img
,
desc
)],
[
dnn
.
GpuDnnPool
(
mode
=
params
[
2
])(
img
,
params
[
0
],
params
[
1
],
(
0
,
0
)
)],
[
img_val
],
[
img_val
],
dnn
.
GpuDnnPool
dnn
.
GpuDnnPool
)
)
...
@@ -561,16 +606,13 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -561,16 +606,13 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(
1
,
1
),
(
2
,
2
),
(
3
,
3
)],
[(
1
,
1
),
(
2
,
2
),
(
3
,
3
)],
[
'max'
,
'average_inc_pad'
]
[
'max'
,
'average_inc_pad'
]
):
):
desc
=
dnn
.
GpuDnnPoolDesc
(
pool_grad
=
dnn
.
GpuDnnPoolGrad
(
mode
=
params
[
2
])(
ws
=
params
[
0
],
stride
=
params
[
1
],
mode
=
params
[
2
]
)()
pool_grad
=
dnn
.
GpuDnnPoolGrad
()(
img
,
img
,
out
,
out
,
img_grad
,
img_grad
,
desc
params
[
0
],
params
[
1
],
(
0
,
0
)
)
)
self
.
_compile_and_check
(
self
.
_compile_and_check
(
[
img
,
img_grad
,
out
],
[
img
,
img_grad
,
out
],
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论