Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
d3f52989
提交
d3f52989
authored
6月 16, 2011
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine mechanism to test different kinds of pre-allocated output storage
上级
48a14134
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
143 行增加
和
131 行删除
+143
-131
debugmode.py
theano/compile/debugmode.py
+143
-131
没有找到文件。
theano/compile/debugmode.py
浏览文件 @
d3f52989
...
@@ -11,7 +11,8 @@ from theano import gof
...
@@ -11,7 +11,8 @@ from theano import gof
from
theano.gof
import
Env
,
graph
,
utils
,
link
from
theano.gof
import
Env
,
graph
,
utils
,
link
from
theano.gof.link
import
raise_with_op
from
theano.gof.link
import
raise_with_op
from
theano.gof.cc
import
CLinker
from
theano.gof.cc
import
CLinker
from
theano.configparser
import
config
,
AddConfigVar
,
IntParam
,
BoolParam
from
theano.configparser
import
(
config
,
AddConfigVar
,
BoolParam
,
IntParam
,
StrParam
)
from
theano.compile.function_module
import
(
FunctionMaker
,
from
theano.compile.function_module
import
(
FunctionMaker
,
Function
,
Function
,
infer_reuse_pattern
,
infer_reuse_pattern
,
...
@@ -47,8 +48,13 @@ AddConfigVar('DebugMode.warn_input_not_reused',
...
@@ -47,8 +48,13 @@ AddConfigVar('DebugMode.warn_input_not_reused',
BoolParam
(
True
))
BoolParam
(
True
))
AddConfigVar
(
'DebugMode.check_preallocated_output'
,
AddConfigVar
(
'DebugMode.check_preallocated_output'
,
'Test thunks with pre-allocated memory as output storage.'
,
(
'Test thunks with pre-allocated memory as output storage. '
BoolParam
(
False
))
'This is a list of strings separated by ":". Valid values are: '
'"previous" (previously-returned memory), '
'"c_contiguous", "f_contiguous", '
'"neg_strides" (negative strides), and '
'"ALL" (all of the above).'
),
StrParam
(
''
))
import
logging
import
logging
_logger
=
logging
.
getLogger
(
"theano.compile.debugmode"
)
_logger
=
logging
.
getLogger
(
"theano.compile.debugmode"
)
...
@@ -819,6 +825,109 @@ def _find_bad_optimizations2(order, reasons, r_vals):
...
@@ -819,6 +825,109 @@ def _find_bad_optimizations2(order, reasons, r_vals):
_find_bad_optimizations
=
_find_bad_optimizations0
_find_bad_optimizations
=
_find_bad_optimizations0
def
_check_preallocated_output
(
node
,
thunk
,
prealloc_modes
,
def_val
,
storage_map
,
r_vals
,
dr_vals
,
perform
,
active_order_set
):
'''Try to apply thunk() on different output storages'''
# To avoid circular imports
from
theano.tensor
import
TensorType
from
theano.sandbox.cuda
import
cuda_available
,
CudaNdarrayType
if
cuda_available
:
from
theano.sandbox.cuda
import
CudaNdarray
# List of (name, map) pairs of the settings to test
prealloc_maps
=
[]
# TODO: Sparse, Scalar
# TODO: wrong shape, more stride patterns
# reuse_output: use a copy of the same storage returned the first time
# TODO: optimization warning if the storage in reuse_outputs
# is not reused
if
'previous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
reuse_outputs
=
{}
for
r
in
node
.
outputs
:
# We want to reuse the exact same memory buffer,
# so we keep the copy in r_vals
new_r
=
_lessbroken_deepcopy
(
r_vals
[
r
])
reuse_outputs
[
r
]
=
r_vals
[
r
]
r_vals
[
r
]
=
new_r
prealloc_maps
.
append
((
'previous'
,
reuse_outputs
))
# c_cont_output: use a c-continuous array
# (for TensorType and CudaNdarray, else None)
if
'c_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
c_cont_outputs
=
{}
for
r
in
node
.
outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
# Build a C-contiguous buffer
new_buf
=
numpy
.
zeros
(
shape
=
r_vals
[
r
]
.
shape
,
dtype
=
r_vals
[
r
]
.
dtype
,
order
=
'C'
)
new_buf
+=
def_val
if
isinstance
(
r
.
type
,
CudaNdarrayType
):
new_buf
=
CudaNdarray
(
new_buf
)
c_cont_outputs
[
r
]
=
new_buf
if
len
(
c_cont_outputs
):
prealloc_maps
.
append
((
'c_contiguous'
,
c_cont_outputs
))
# f_cont_output: use a fortran-continuous ndarray
# (for TensorType, only)
if
'f_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
f_cont_outputs
=
{}
for
r
in
node
.
outputs
:
if
isinstance
(
r
.
type
,
TensorType
):
new_buf
=
numpy
.
zeros
(
shape
=
r_vals
[
r
]
.
shape
,
dtype
=
r_vals
[
r
]
.
dtype
,
order
=
'F'
)
new_buf
+=
def_val
f_cont_outputs
[
r
]
=
new_buf
if
len
(
f_cont_outputs
):
prealloc_maps
.
append
((
'f_contiguous'
,
f_cont_outputs
))
if
'neg_strides'
in
prealloc_maps
:
raise
NotImplementedError
(
'Negative strides in check_preallocated_output'
)
for
(
name
,
out_map
)
in
prealloc_maps
:
# Copy the inputs over again
for
r
in
node
.
inputs
:
storage_map
[
r
][
0
]
=
_lessbroken_deepcopy
(
r_vals
[
r
])
# Get the appropriate output storages
# (no copy)
for
r
in
node
.
outputs
:
storage_map
[
r
][
0
]
=
out_map
.
get
(
r
,
None
)
thunk
()
# Check outputs
for
r
in
node
.
outputs
:
if
not
r
.
type
.
is_valid_value
(
storage_map
[
r
][
0
]):
raise
InvalidValueError
(
r
,
storage_map
[
r
][
0
],
hint
=
'
%
s with
%
s output'
%
(
perform
,
name
),
specific_hint
=
r
.
type
.
value_validity_msg
(
storage_map
[
r
][
0
]))
_check_inputs
(
node
,
storage_map
,
r_vals
,
dr_vals
,
active_order_set
,
clobber_dr_vals
=
False
,
perform
=
'
%
s with output
%
s'
%
(
perform
,
name
),
warn_input_not_reused
=
False
)
_check_viewmap
(
node
,
storage_map
)
for
r
in
node
.
outputs
:
if
not
r
.
type
.
values_eq_approx
(
r_vals
[
r
],
storage_map
[
r
][
0
]):
# TODO: indicate it is not a C/Py problem
raise
BadCLinkerOutput
(
r
,
val_py
=
r_vals
[
r
],
val_c
=
storage_map
[
r
][
0
])
# Clear storage_map
for
r
in
node
.
outputs
:
storage_map
[
r
][
0
]
=
None
class
_EnvEvent
(
object
):
class
_EnvEvent
(
object
):
"""A record of an event in the life of an Env.
"""A record of an event in the life of an Env.
...
@@ -1029,9 +1138,7 @@ class _Linker(gof.link.LocalLinker):
...
@@ -1029,9 +1138,7 @@ class _Linker(gof.link.LocalLinker):
#can't import at toplevel because of circular import
#can't import at toplevel because of circular import
# TODO: don't do this ugly hacky way of setting the filter_checks_isfinite
# TODO: don't do this ugly hacky way of setting the filter_checks_isfinite
from
theano.tensor
import
TensorType
#to set filter_check_isfinite
from
theano.tensor
import
TensorType
#to set filter_check_isfinite
from
theano.sandbox.cuda
import
cuda_available
,
CudaNdarrayType
from
theano
import
tests
# for config.unittests.rseed
if
cuda_available
:
from
theano.sandbox.cuda
import
CudaNdarray
env
=
self
.
env
env
=
self
.
env
input_storage_
=
input_storage
input_storage_
=
input_storage
output_storage_
=
output_storage
output_storage_
=
output_storage
...
@@ -1092,6 +1199,13 @@ class _Linker(gof.link.LocalLinker):
...
@@ -1092,6 +1199,13 @@ class _Linker(gof.link.LocalLinker):
else
:
else
:
no_recycling
=
[
storage_map
[
r
]
for
r
in
no_recycling
if
r
not
in
env
.
inputs
]
no_recycling
=
[
storage_map
[
r
]
for
r
in
no_recycling
if
r
not
in
env
.
inputs
]
# Precompute some things for storage pre-allocation
prealloc_modes
=
config
.
DebugMode
.
check_preallocated_output
.
split
(
':'
)
try
:
def_val
=
int
(
config
.
unittests
.
rseed
)
except
ValueError
:
def_val
=
666
#####
#####
# This is the function that runs when you evaluate the graph
# This is the function that runs when you evaluate the graph
#####
#####
...
@@ -1133,14 +1247,8 @@ class _Linker(gof.link.LocalLinker):
...
@@ -1133,14 +1247,8 @@ class _Linker(gof.link.LocalLinker):
storage_map
[
r
][
0
]
=
None
storage_map
[
r
][
0
]
=
None
r_vals_initialized
.
append
(
r
)
r_vals_initialized
.
append
(
r
)
# TODO: store them in another map, and test the thunks on
# Debug Mode complains if someone provides memory buffers
# them as output storages.
# for the outputs (where the linker can choose to store the
# outputs). Since this is what scan does by default, we will
# delete the output_storage for now. This code is going to
# change when someone decides to go over the debug code
# again, and try to include checks for such behaviour as
# well.
for
r
in
storage_map
:
for
r
in
storage_map
:
if
r
in
env
.
outputs
:
if
r
in
env
.
outputs
:
storage_map
[
r
][
0
]
=
None
storage_map
[
r
][
0
]
=
None
...
@@ -1199,68 +1307,16 @@ class _Linker(gof.link.LocalLinker):
...
@@ -1199,68 +1307,16 @@ class _Linker(gof.link.LocalLinker):
storage_map
[
r
][
0
]
=
None
#clear the storage_map of outputs for the thunk_c
storage_map
[
r
][
0
]
=
None
#clear the storage_map of outputs for the thunk_c
if
config
.
DebugMode
.
check_preallocated_output
:
if
config
.
DebugMode
.
check_preallocated_output
:
## Then, try to use different output storages
_check_preallocated_output
(
# reuse_output: use a copy of the same storage returned the first time
node
=
node
,
# TODO: optimization warning if the storage in reuse_outputs
thunk
=
thunk_py
,
# is not reused
prealloc_modes
=
prealloc_modes
,
# c_cont_output: use a c-continuous ndarray (for TensorType, else None)
def_val
=
def_val
,
# f_cont_output: use a fortran-continuous ndarray (for TensorType, else None)
storage_map
=
storage_map
,
# TODO: Sparse, Scalar
r_vals
=
r_vals
,
# TODO: wrong shape, more stride patterns
dr_vals
=
dr_vals
,
reuse_outputs
=
{}
perform
=
'py'
,
c_cont_outputs
=
{}
active_order_set
=
active_order_set
)
f_cont_outputs
=
{}
for
r
in
node
.
outputs
:
r_val
=
r_vals
[
r
]
reuse_outputs
[
r
]
=
_lessbroken_deepcopy
(
r_val
)
if
isinstance
(
r
.
type
,
TensorType
):
c_cont_outputs
[
r
]
=
numpy
.
empty
(
shape
=
r_val
.
shape
,
dtype
=
r_val
.
dtype
,
order
=
'C'
)
f_cont_outputs
[
r
]
=
numpy
.
empty
(
shape
=
r_val
.
shape
,
dtype
=
r_val
.
dtype
,
order
=
'F'
)
elif
isinstance
(
r
.
type
,
CudaNdarrayType
):
# CudaNdarray supports only C-contiguous
c_cont_outputs
[
r
]
=
CudaNdarray
.
zeros
(
r_val
.
shape
)
for
out_map
in
(
reuse_outputs
,
c_cont_outputs
,
f_cont_outputs
):
if
len
(
out_map
)
==
0
:
# All storages are None, no need to test that again
continue
# Copy the inputs over again
for
r
in
node
.
inputs
:
storage_map
[
r
][
0
]
=
_lessbroken_deepcopy
(
r_vals
[
r
])
# Copy the appropriate output storages
for
r
in
node
.
outputs
:
storage_map
[
r
][
0
]
=
out_map
.
get
(
r
,
None
)
thunk_py
()
# Check outputs
for
r
in
node
.
outputs
:
if
not
r
.
type
.
is_valid_value
(
storage_map
[
r
][
0
]):
raise
InvalidValueError
(
r
,
storage_map
[
r
][
0
],
hint
=
'perform output'
,
specific_hint
=
r
.
type
.
value_validity_msg
(
storage_map
[
r
][
0
]))
_check_inputs
(
node
,
storage_map
,
r_vals
,
dr_vals
,
active_order_set
,
clobber_dr_vals
=
False
,
perform
=
'py'
,
warn_input_not_reused
=
False
)
_check_viewmap
(
node
,
storage_map
)
for
r
in
node
.
outputs
:
if
not
r
.
type
.
values_eq_approx
(
r_vals
[
r
],
storage_map
[
r
][
0
]):
# TODO: indicate it is not a C/Py problem
raise
BadCLinkerOutput
(
r
,
val_py
=
r_vals
[
r
],
val_c
=
storage_map
[
r
][
0
])
# Clear storage_map
for
r
in
node
.
outputs
:
storage_map
[
r
][
0
]
=
None
# print >> sys.stderr, i, "DEBUGMODE thunk_py %100s %50s %30s" % (node,
# print >> sys.stderr, i, "DEBUGMODE thunk_py %100s %50s %30s" % (node,
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
...
@@ -1332,65 +1388,21 @@ class _Linker(gof.link.LocalLinker):
...
@@ -1332,65 +1388,21 @@ class _Linker(gof.link.LocalLinker):
storage_map
[
r
][
0
]
=
None
#clear the storage_map for the thunk_c
storage_map
[
r
][
0
]
=
None
#clear the storage_map for the thunk_c
if
config
.
DebugMode
.
check_preallocated_output
:
if
config
.
DebugMode
.
check_preallocated_output
:
## Then, try to use different output storages
def
thunk
():
# TODO: factorize that code with the one for Python above
reuse_outputs
=
{}
c_cont_outputs
=
{}
f_cont_outputs
=
{}
for
r
in
node
.
outputs
:
r_val
=
r_vals
[
r
]
reuse_outputs
[
r
]
=
_lessbroken_deepcopy
(
r_val
)
if
isinstance
(
r
.
type
,
TensorType
):
c_cont_outputs
[
r
]
=
numpy
.
empty
(
shape
=
r_val
.
shape
,
dtype
=
r_val
.
dtype
,
order
=
'C'
)
f_cont_outputs
[
r
]
=
numpy
.
empty
(
shape
=
r_val
.
shape
,
dtype
=
r_val
.
dtype
,
order
=
'F'
)
for
out_map
in
(
reuse_outputs
,
c_cont_outputs
,
f_cont_outputs
):
if
len
(
out_map
)
==
0
:
# All storages are None, no need to test that again
continue
# Copy the inputs over again
for
r
in
node
.
inputs
:
storage_map
[
r
][
0
]
=
_lessbroken_deepcopy
(
r_vals
[
r
])
# Copy the appropriate output storages
for
r
in
node
.
outputs
:
#storage_map[r][0] = out_map.get(r, None)
if
r
in
out_map
:
storage_map
[
r
][
0
]
=
out_map
[
r
]
else
:
print
'not tensor?'
,
r
try
:
try
:
thunk_c
()
thunk_c
()
except
:
except
:
raise_with_op
(
node
)
raise_with_op
(
node
)
_check_preallocated_output
(
# Check outputs
node
=
node
,
for
r
in
node
.
outputs
:
thunk
=
thunk
,
if
not
r
.
type
.
is_valid_value
(
storage_map
[
r
][
0
]):
prealloc_modes
=
prealloc_modes
,
raise
InvalidValueError
(
r
,
storage_map
[
r
][
0
],
hint
=
'perform output'
,
specific_hint
=
r
.
type
.
value_validity_msg
(
storage_map
[
r
][
0
]))
def_val
=
def_val
,
storage_map
=
storage_map
,
_check_inputs
(
node
,
storage_map
,
r_vals
,
dr_vals
,
active_order_set
,
r_vals
=
r_vals
,
clobber_dr_vals
=
False
,
perform
=
'c'
,
dr_vals
=
dr_vals
,
warn_input_not_reused
=
False
)
perform
=
'c code'
,
active_order_set
=
active_order_set
)
_check_viewmap
(
node
,
storage_map
)
for
r
in
node
.
outputs
:
if
not
r
.
type
.
values_eq_approx
(
r_vals
[
r
],
storage_map
[
r
][
0
]):
# TODO: indicate it is not a C/Py problem
raise
BadCLinkerOutput
(
r
,
val_py
=
r_vals
[
r
],
val_c
=
storage_map
[
r
][
0
])
# Clear storage map
for
r
in
node
.
outputs
:
storage_map
[
r
][
0
]
=
None
# print >> sys.stderr, i, "DEBUGMODE thunk_c %100s %50s %30s" % (node,
# print >> sys.stderr, i, "DEBUGMODE thunk_c %100s %50s %30s" % (node,
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论