Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
36437aca
提交
36437aca
authored
11月 24, 2014
作者:
abergeron
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2281 from nouiz/dnn_pool_grad
Enable again dnn pool grad after fixed it.
上级
f3a80dcb
44be801d
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
128 行增加
和
55 行删除
+128
-55
__init__.py
theano/__init__.py
+10
-9
link.py
theano/gof/link.py
+3
-3
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+2
-1
dnn.py
theano/sandbox/cuda/dnn.py
+38
-26
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+75
-16
没有找到文件。
theano/__init__.py
浏览文件 @
36437aca
...
...
@@ -81,6 +81,16 @@ from theano.updates import Updates, OrderedUpdates
from
theano.gradient
import
Rop
,
Lop
,
grad
,
subgraph_grad
# This need to be before the init of GPU, as it add config variable
# needed during that phase.
import
theano.tests
if
hasattr
(
theano
.
tests
,
"TheanoNoseTester"
):
test
=
theano
.
tests
.
TheanoNoseTester
()
.
test
else
:
def
test
():
raise
ImportError
(
"The nose module is not installed."
" It is needed for Theano tests."
)
if
config
.
device
.
startswith
(
'gpu'
)
or
config
.
init_gpu_device
.
startswith
(
'gpu'
):
import
theano.sandbox.cuda
# We can't test the driver during import of theano.sandbox.cuda as
...
...
@@ -185,15 +195,6 @@ def sparse_grad(var):
ret
=
var
.
owner
.
op
.
__class__
(
sparse_grad
=
True
)(
*
var
.
owner
.
inputs
)
return
ret
import
theano.tests
if
hasattr
(
theano
.
tests
,
"TheanoNoseTester"
):
test
=
theano
.
tests
.
TheanoNoseTester
()
.
test
else
:
def
test
():
raise
ImportError
(
"The nose module is not installed."
" It is needed for Theano tests."
)
# This cannot be done in tensor/__init__.py due to a circular dependency -- randomstreams
# depends on raw_random which depends on tensor. As a work-around, we import RandomStreams
# here and inject an instance in tensor.
...
...
theano/gof/link.py
浏览文件 @
36437aca
...
...
@@ -126,10 +126,10 @@ def raise_with_op(node, thunk=None, exc_info=None):
for
ipt
in
thunk
.
inputs
]
scalar_values
=
[]
for
ipt
in
thunk
.
inputs
:
if
getattr
(
ipt
[
0
],
"size"
,
-
1
)
==
1
:
if
getattr
(
ipt
[
0
],
"size"
,
-
1
)
<=
5
:
scalar_values
.
append
(
ipt
[
0
])
else
:
scalar_values
.
append
(
"not s
calar
"
)
scalar_values
.
append
(
"not s
hown
"
)
else
:
shapes
=
"The thunk don't have an inputs attributes."
strides
=
"So we can't access the strides of inputs values"
...
...
@@ -137,7 +137,7 @@ def raise_with_op(node, thunk=None, exc_info=None):
detailed_err_msg
+=
(
"Inputs shapes:
%
s"
%
shapes
+
"
\n
Inputs strides:
%
s"
%
strides
+
"
\n
Inputs
scalar
values:
%
s
\n
"
%
scalar_values
)
"
\n
Inputs values:
%
s
\n
"
%
scalar_values
)
else
:
hints
.
append
(
"HINT: Use another linker then the c linker to"
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
36437aca
...
...
@@ -2345,7 +2345,8 @@ class GpuReshape(tensor.Reshape, GpuOp):
shp
=
shp_new
else
:
raise
ValueError
(
"total size of new array must be unchanged"
)
raise
ValueError
(
"total size of new array must be unchanged"
,
x
.
shape
,
shp
)
out
[
0
]
=
x
.
reshape
(
tuple
(
shp
))
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
36437aca
...
...
@@ -723,7 +723,8 @@ int wsX, wsY, strideX, strideY;
err
%(name)
s = cudnnGetPoolingDescriptor(
%(desc)
s, &mode, &wsX, &wsY, &strideX, &strideY);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation:
%%
s",
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnGetPoolingDescriptor operation:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
...
...
@@ -747,7 +748,8 @@ _handle,
%(output_desc)
s, CudaNdarray_DEV_DATA(
%(out)
s)
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation:
%%
s",
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPool: error doing cudnnPoolingForward operation:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
...
...
@@ -765,7 +767,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
out
=
self
(
img
,
desc
)
g_out
=
GpuDnnPoolGrad
()(
out
,
grad
,
img
,
desc
)
g_out
=
GpuDnnPoolGrad
()(
img
,
out
,
grad
,
desc
)
return
g_out
,
theano
.
gradient
.
DisconnectedType
()()
...
...
@@ -774,7 +776,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return
[[
1
],
[
0
]]
def
c_code_cache_version
(
self
):
return
(
2
,)
return
(
4
,)
class
GpuDnnPoolGrad
(
DnnBase
):
...
...
@@ -782,13 +784,13 @@ class GpuDnnPoolGrad(DnnBase):
The pooling gradient.
:param inp: the input of the pooling.
:param inp_grad: same size as out, but is the corresponding gradient information.
:param out: the output of the pooling in the forward.
:param inp_grad: same size as out, but is the corresponding gradient information.
:param desc: The pooling descriptor.
"""
__props__
=
()
def
make_node
(
self
,
inp
,
inp_grad
,
out
,
desc
):
def
make_node
(
self
,
inp
,
out
,
inp_grad
,
desc
):
inp
=
as_cuda_ndarray_variable
(
inp
)
if
inp
.
type
.
ndim
!=
4
:
raise
TypeError
(
'inp must be 4D tensor'
)
...
...
@@ -805,7 +807,7 @@ class GpuDnnPoolGrad(DnnBase):
or
desc
.
type
.
ctype
!=
'cudnnPoolingDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnPoolingDescriptor_t'
)
return
Apply
(
self
,
[
inp
,
inp_grad
,
out
,
desc
],
return
Apply
(
self
,
[
inp
,
out
,
inp_grad
,
desc
],
[
inp
.
type
()])
def
c_support_code_struct
(
self
,
node
,
struct_id
):
...
...
@@ -824,22 +826,26 @@ input_grad%(id)d = NULL;
output
%(id)
d = NULL;
output_grad
%(id)
d = NULL;
if ((err
%(id)
d = cudnnCreateTensor4dDescriptor(&input
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(input):
%%
s", cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
if ((err
%(id)
d = cudnnCreateTensor4dDescriptor(&input_grad
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(input_grad):
%%
s", cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
if ((err
%(id)
d = cudnnCreateTensor4dDescriptor(&output
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(output):
%%
s", cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
if ((err
%(id)
d = cudnnCreateTensor4dDescriptor(&output_grad
%(id)
d)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(output_grad):
%%
s", cudnnGetErrorString(err
%(id)
d));
%(fail)
s
}
...
...
@@ -854,7 +860,10 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)
"""
%
dict
(
id
=
struct_id
)
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
inp
,
inp_grad
,
out
,
desc
=
inputs
# Here the name out and inp are based on the cudnn definition.
# Not the definition of this class.
# This make it complicated.
out
,
inp
,
inp_grad
,
desc
=
inputs
out_grad
,
=
outputs
set_in
=
"
\n
"
.
join
([
...
...
@@ -873,23 +882,27 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)
cudnnStatus_t err
%(name)
s;
if (!CudaNdarray_is_c_contiguous(
%(input)
s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous inputs are supported.");
%(fail)
s
}
if (!CudaNdarray_is_c_contiguous(
%(input_grad)
s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous input gradients are supported.");
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous input gradients are supported.");
%(fail)
s
}
if (!CudaNdarray_is_c_contiguous(
%(output)
s)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous outputs are supported.");
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous outputs are supported.");
%(fail)
s
}
%(set_in)
s
if (CudaNdarray_prep_output(&
%(output_grad)
s, 4, CudaNdarray_HOST_DIMS(
%(output)
s)) != 0)
if (CudaNdarray_prep_output(&
%(output_grad)
s, 4,
CudaNdarray_HOST_DIMS(
%(output)
s)) != 0)
{
%(fail)
s
}
...
...
@@ -905,7 +918,8 @@ _handle,
%(output_grad_desc)
s, CudaNdarray_DEV_DATA(
%(output_grad)
s)
);
if (err
%(name)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation:
%%
s",
PyErr_Format(PyExc_RuntimeError,
"GpuDnnPoolGrad: error doing operation:
%%
s",
cudnnGetErrorString(err
%(name)
s));
%(fail)
s
}
...
...
@@ -919,7 +933,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
output_grad_desc
=
"output_grad"
+
str
(
sub
[
'struct_id'
]))
def
c_code_cache_version
(
self
):
return
(
2
,)
return
(
4
,)
def
dnn_pool
(
img
,
ws
,
stride
=
(
1
,
1
),
mode
=
'max'
):
...
...
@@ -1193,36 +1207,34 @@ if True:
border_mode
=
border_mode
,
subsample
=
subsample
,
direction_hint
=
direction_hint
)]
# DISABLED as there is problems in the handling of borders
# @register_opt('cudnn')
@register_opt
(
'cudnn'
)
@local_optimizer
([
GpuDownsampleFactorMax
])
def
local_pool_dnn
(
node
):
if
not
dnn_available
():
return
if
isinstance
(
node
.
op
,
GpuDownsampleFactorMax
):
if
node
.
op
.
ignore_border
:
if
no
t
no
de
.
op
.
ignore_border
:
return
img
,
=
node
.
inputs
ds
=
node
.
op
.
ds
return
[
dnn_pool
(
gpu_contiguous
(
img
),
ds
,
ds
)]
# DISABLED as there is problems in the handling of borders
# @register_opt('cudnn')
@register_opt
(
'cudnn'
)
@local_optimizer
([
GpuDownsampleFactorMaxGrad
])
def
local_pool_dnn_grad
(
node
):
if
not
dnn_available
():
return
if
isinstance
(
node
.
op
,
GpuDownsampleFactorMaxGrad
):
if
node
.
op
.
ignore_border
:
if
no
t
no
de
.
op
.
ignore_border
:
return
inp
,
out
,
inp_grad
=
node
.
inputs
ds
=
node
.
op
.
ds
desc
=
GpuDnnPoolDesc
(
ws
=
ds
,
stride
=
ds
,
mode
=
"max"
)()
return
[
GpuDnnPoolGrad
()(
gpu_contiguous
(
inp
),
gpu_contiguous
(
out
),
gpu_contiguous
(
inp_grad
),
gpu_contiguous
(
out
),
desc
)]
desc
)]
@register_opt
(
'cudnn'
)
@local_optimizer
([
GpuSoftmax
])
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
36437aca
...
...
@@ -11,6 +11,7 @@ import theano.tensor as T
import
theano.tests.unittest_tools
as
utt
from
theano.sandbox.neighbours
import
images2neibs
,
neibs2images
from
theano.tensor.signal.downsample
import
max_pool_2d
from
theano.tensor.signal.downsample
import
DownsampleFactorMaxGrad
# Skip test if cuda_ndarray is not available.
...
...
@@ -57,26 +58,84 @@ def test_pooling():
x
=
T
.
ftensor4
()
for
func
in
(
T
.
max
,
T
.
mean
):
for
ws
in
(
4
,
5
):
for
ws
in
(
2
,
4
,
5
):
for
stride
in
(
2
,
3
):
out1
=
cuda
.
dnn
.
dnn_pool
(
x
,
ws
=
(
ws
,
ws
),
stride
=
(
stride
,
stride
),
mode
=
'max'
if
func
is
T
.
max
else
"average"
)
if
stride
>
ws
:
continue
if
ws
==
stride
and
func
is
T
.
max
:
# We will check that the opt introduced it.
out1
=
max_pool_2d
(
x
,
(
ws
,
ws
),
ignore_border
=
True
)
else
:
out1
=
cuda
.
dnn
.
dnn_pool
(
x
,
ws
=
(
ws
,
ws
),
stride
=
(
stride
,
stride
),
mode
=
'max'
if
func
is
T
.
max
else
"average"
)
out2
=
pool_2d_i2n
(
x
,
ds
=
(
ws
,
ws
),
strides
=
(
stride
,
stride
),
pool_function
=
func
)
f1
=
theano
.
function
([
x
],
out1
,
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
node
in
f1
.
maker
.
fgraph
.
apply_nodes
])
f2
=
theano
.
function
([
x
],
out2
,
mode
=
mode_with_gpu
)
data
=
numpy
.
random
.
normal
(
0
,
1
,
(
1
,
10
,
100
,
100
))
.
astype
(
"float32"
)
a
=
f1
(
data
)
.
__array__
()
b
=
f2
(
data
)
.
__array__
()
assert
numpy
.
allclose
(
a
,
b
,
atol
=
numpy
.
finfo
(
numpy
.
float32
)
.
eps
)
assert
not
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
node
in
f2
.
maker
.
fgraph
.
apply_nodes
])
for
shp
in
[(
1
,
10
,
100
,
100
),
(
1
,
3
,
99
,
99
),
(
32
,
1
,
147
,
197
),
]:
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
a
=
f1
(
data
)
.
__array__
()
b
=
f2
(
data
)
.
__array__
()
assert
numpy
.
allclose
(
a
,
b
,
atol
=
numpy
.
finfo
(
numpy
.
float32
)
.
eps
)
# Test the grad
for
shp
in
[(
1
,
1
,
2
,
2
),
(
1
,
1
,
3
,
3
)]:
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
*
10
ws
=
2
strides
=
2
# This test the CPU grad + opt + GPU implemtentation
def
fn
(
x
):
return
max_pool_2d
(
x
,
(
ws
,
ws
),
ignore_border
=
True
)
theano
.
tests
.
unittest_tools
.
verify_grad
(
fn
,
[
data
],
cast_to_output_type
=
False
,
mode
=
mode_with_gpu
)
# Confirm that the opt would have inserted it.
f
=
theano
.
function
([
x
],
theano
.
grad
(
fn
(
x
)
.
sum
(),
x
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPoolGrad
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
# Test the GPU grad + GPU implementation
def
fn
(
x
):
dnn_op
=
cuda
.
dnn
.
dnn_pool
(
x
,
ws
=
(
ws
,
ws
),
stride
=
(
stride
,
stride
),
mode
=
'max'
if
func
is
T
.
max
else
"average"
)
return
dnn_op
theano
.
tests
.
unittest_tools
.
verify_grad
(
fn
,
[
data
],
cast_to_output_type
=
False
,
mode
=
mode_with_gpu
)
# Confirm that we get the good op.
f
=
theano
.
function
([
x
],
theano
.
grad
(
fn
(
x
)
.
sum
(),
x
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPoolGrad
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
g_out
=
f
(
data
)
if
func
is
T
.
max
:
# Compare again the CPU result
out
=
max_pool_2d
(
x
,
(
ws
,
ws
),
ignore_border
=
True
)
f
=
theano
.
function
([
x
],
theano
.
grad
(
out
.
sum
(),
x
),
mode
=
mode_without_gpu
)
assert
any
([
isinstance
(
node
.
op
,
DownsampleFactorMaxGrad
)
for
node
in
f
.
maker
.
fgraph
.
toposort
()])
c_out
=
f
(
data
)
assert
numpy
.
allclose
(
c_out
,
g_out
)
def
test_pooling_opt
():
...
...
@@ -87,7 +146,7 @@ def test_pooling_opt():
f
=
theano
.
function
(
[
x
],
max_pool_2d
(
x
,
ds
=
(
2
,
2
)),
max_pool_2d
(
x
,
ds
=
(
2
,
2
)
,
ignore_border
=
True
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
n
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
...
...
@@ -95,7 +154,7 @@ def test_pooling_opt():
f
=
theano
.
function
(
[
x
],
T
.
grad
(
max_pool_2d
(
x
,
ds
=
(
2
,
2
))
.
sum
(),
x
),
T
.
grad
(
max_pool_2d
(
x
,
ds
=
(
2
,
2
)
,
ignore_border
=
True
)
.
sum
(),
x
),
mode
=
mode_with_gpu
.
including
(
"cudnn"
))
assert
any
([
isinstance
(
n
.
op
,
cuda
.
dnn
.
GpuDnnPoolGrad
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论