Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
be9316f0
提交
be9316f0
authored
4月 03, 2012
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Do not test preallocated output for inplace outs
Also, do not return a preallocated map it if is empty, and fill "previous" storage with default value.
上级
4c2d9e04
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
87 行增加
和
39 行删除
+87
-39
debugmode.py
theano/compile/debugmode.py
+87
-39
没有找到文件。
theano/compile/debugmode.py
浏览文件 @
be9316f0
...
@@ -670,18 +670,27 @@ def _optcheck_env(input_specs, output_specs, accept_inplace=False):
...
@@ -670,18 +670,27 @@ def _optcheck_env(input_specs, output_specs, accept_inplace=False):
def
_check_inputs
(
node
,
storage_map
,
r_vals
,
dr_vals
,
active_nodes
,
def
_check_inputs
(
node
,
storage_map
,
r_vals
,
dr_vals
,
active_nodes
,
clobber_dr_vals
=
True
,
clobber_dr_vals
=
True
,
perform
=
None
,
warn_input_not_reused
=
True
):
perform
=
None
,
warn_input_not_reused
=
True
):
"""Raise BadDestroyMap if necessary, update dr_vals"""
"""
Raise BadDestroyMap if necessary, update dr_vals
Returns a list of output variables that actually worked inplace
(their value is aliased to the value of at least one input).
"""
destroyed_idx_list
=
[]
destroyed_idx_list
=
[]
destroy_map
=
getattr
(
node
.
op
,
'destroy_map'
,
{})
destroy_map
=
getattr
(
node
.
op
,
'destroy_map'
,
{})
for
o_pos
,
i_pos_list
in
destroy_map
.
iteritems
():
for
o_pos
,
i_pos_list
in
destroy_map
.
iteritems
():
destroyed_idx_list
.
extend
(
i_pos_list
)
destroyed_idx_list
.
extend
(
i_pos_list
)
destroyed_res_list
=
[
node
.
inputs
[
i
]
for
i
in
destroyed_idx_list
]
destroyed_res_list
=
[
node
.
inputs
[
i
]
for
i
in
destroyed_idx_list
]
if
warn_input_not_reused
and
destroyed_res_list
:
actually_inplace_outputs
=
[]
dmap
=
getattr
(
node
.
op
,
'destroy_map'
,
{})
dmap
=
getattr
(
node
.
op
,
'destroy_map'
,
{})
for
oo
,
ii
in
dmap
.
iteritems
():
for
oo
,
ii
in
dmap
.
iteritems
():
out_var
=
storage_map
[
node
.
outputs
[
oo
]][
0
]
out_var
=
storage_map
[
node
.
outputs
[
oo
]][
0
]
in_var
=
storage_map
[
node
.
inputs
[
ii
[
0
]]][
0
]
in_var
=
storage_map
[
node
.
inputs
[
ii
[
0
]]][
0
]
if
_may_share_memory
(
out_var
,
in_var
):
actually_inplace_outputs
.
append
(
node
.
outputs
[
oo
])
if
warn_input_not_reused
and
destroyed_res_list
:
if
isinstance
(
node
.
op
,
theano
.
compile
.
mode
.
OutputGuard
):
if
isinstance
(
node
.
op
,
theano
.
compile
.
mode
.
OutputGuard
):
# The point of OutputGuard is to be declared as destructive
# The point of OutputGuard is to be declared as destructive
# while not destroying anything
# while not destroying anything
...
@@ -691,11 +700,14 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
...
@@ -691,11 +700,14 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
"as destroyed was not changed for node '
%
s'"
,
"as destroyed was not changed for node '
%
s'"
,
ii
[
0
],
str
(
node
))
ii
[
0
],
str
(
node
))
if
warn_input_not_reused
:
vmap
=
getattr
(
node
.
op
,
'view_map'
,
{})
vmap
=
getattr
(
node
.
op
,
'view_map'
,
{})
for
oo
,
ii
in
vmap
.
iteritems
():
for
oo
,
ii
in
vmap
.
iteritems
():
out_var
=
storage_map
[
node
.
outputs
[
oo
]][
0
]
out_var
=
storage_map
[
node
.
outputs
[
oo
]][
0
]
in_var
=
storage_map
[
node
.
inputs
[
ii
[
0
]]][
0
]
in_var
=
storage_map
[
node
.
inputs
[
ii
[
0
]]][
0
]
if
_may_share_memory
(
out_var
,
in_var
):
actually_inplace_outputs
.
append
(
node
.
outputs
[
oo
])
if
warn_input_not_reused
:
# We don't try to optimize simple scalar and empty ndarray,
# We don't try to optimize simple scalar and empty ndarray,
# as this is not worth our time. This happen at least in
# as this is not worth our time. This happen at least in
# Subtensor when the output is a scalar But this depend on
# Subtensor when the output is a scalar But this depend on
...
@@ -727,6 +739,8 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
...
@@ -727,6 +739,8 @@ def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
raise
BadDestroyMap
(
node
,
r_idx
,
r_vals
[
r
],
raise
BadDestroyMap
(
node
,
r_idx
,
r_vals
[
r
],
storage_map
[
r
][
0
],
perform
)
storage_map
[
r
][
0
],
perform
)
return
actually_inplace_outputs
def
_check_viewmap
(
node
,
storage_map
):
def
_check_viewmap
(
node
,
storage_map
):
"""
"""
...
@@ -994,7 +1008,8 @@ _find_bad_optimizations = _find_bad_optimizations0
...
@@ -994,7 +1008,8 @@ _find_bad_optimizations = _find_bad_optimizations0
def
_get_preallocated_maps
(
node
,
thunk
,
prealloc_modes
,
def_val
,
def
_get_preallocated_maps
(
node
,
thunk
,
prealloc_modes
,
def_val
,
storage_map
,
r_vals
,
dr_vals
,
perform
,
active_order_set
):
storage_map
,
r_vals
,
dr_vals
,
perform
,
active_order_set
,
inplace_outs
):
'''Preallocate outputs in different memory layouts'''
'''Preallocate outputs in different memory layouts'''
# To avoid circular imports
# To avoid circular imports
...
@@ -1006,20 +1021,37 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1006,20 +1021,37 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# TODO: Sparse? Scalar does not really make sense.
# TODO: Sparse? Scalar does not really make sense.
# Do not preallocate memory for outputs that actually work inplace
considered_outputs
=
[]
for
r
in
node
.
outputs
:
if
r
not
in
inplace_outs
:
considered_outputs
.
append
(
r
)
# reuse_output: use a copy of the same storage returned the first time
# reuse_output: use a copy of the same storage returned the first time
# TODO: optimization warning if the storage in reuse_outputs
# TODO: optimization warning if the storage in reuse_outputs
# is not reused
# is not reused
# TODO: skip all this for outputs that actually worked inplace
if
'previous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
if
'previous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
reuse_outputs
=
{}
reuse_outputs
=
{}
for
r
in
node
.
outputs
:
for
r
in
considered_
outputs
:
# We want to reuse the exact same memory buffer,
# We want to reuse the exact same memory buffer,
# so we keep the copy in r_vals
# so we keep the copy in r_vals
new_r
=
_lessbroken_deepcopy
(
r_vals
[
r
])
new_r
=
_lessbroken_deepcopy
(
r_vals
[
r
])
reuse_outputs
[
r
]
=
r_vals
[
r
]
reuse_outputs
[
r
]
=
r_vals
[
r
]
r_vals
[
r
]
=
new_r
r_vals
[
r
]
=
new_r
# Sometimes, outputs can be aliased together.
# I'm not sure why it is legitimate, but there are tests about it.
# So, we cannot fill r_vals[r] with def_val yet, we have to wait
# until all output values are deepcopied.
for
r
in
considered_outputs
:
# There is no risk to overwrite inputs, since r does not work
# inplace.
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
reuse_outputs
[
r
][
...
]
=
numpy
.
asarray
(
def_val
)
.
astype
(
r
.
type
.
dtype
)
yield
(
'previous'
,
reuse_outputs
)
if
reuse_outputs
:
yield
(
'previous'
,
reuse_outputs
)
# clear memory that is not needed any more
# clear memory that is not needed any more
del
reuse_outputs
del
reuse_outputs
...
@@ -1027,7 +1059,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1027,7 +1059,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# (for TensorType and CudaNdarray, else None)
# (for TensorType and CudaNdarray, else None)
if
'c_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
if
'c_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
c_cont_outputs
=
{}
c_cont_outputs
=
{}
for
r
in
node
.
outputs
:
for
r
in
considered_
outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
# Build a C-contiguous buffer
# Build a C-contiguous buffer
new_buf
=
r
.
type
.
value_zeros
(
r_vals
[
r
]
.
shape
)
new_buf
=
r
.
type
.
value_zeros
(
r_vals
[
r
]
.
shape
)
...
@@ -1045,7 +1077,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1045,7 +1077,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
# (for TensorType, only)
# (for TensorType, only)
if
'f_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
if
'f_contiguous'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
f_cont_outputs
=
{}
f_cont_outputs
=
{}
for
r
in
node
.
outputs
:
for
r
in
considered_
outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
new_buf
=
numpy
.
zeros
(
new_buf
=
numpy
.
zeros
(
shape
=
r_vals
[
r
]
.
shape
,
shape
=
r_vals
[
r
]
.
shape
,
...
@@ -1089,7 +1121,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1089,7 +1121,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
if
'strided'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
if
'strided'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
# Initial allocation
# Initial allocation
init_strided
=
{}
init_strided
=
{}
for
r
in
node
.
outputs
:
for
r
in
considered_
outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
# Create a buffer twice as large in every dimension,
# Create a buffer twice as large in every dimension,
# except if broadcastable, or for dimensions above 4
# except if broadcastable, or for dimensions above 4
...
@@ -1149,7 +1181,8 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1149,7 +1181,8 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
r_buf
[
...
]
=
numpy
.
asarray
(
def_val
)
.
astype
(
r_buf
.
dtype
)
r_buf
[
...
]
=
numpy
.
asarray
(
def_val
)
.
astype
(
r_buf
.
dtype
)
strided
[
r
]
=
r_buf
strided
[
r
]
=
r_buf
yield
(
name
,
strided
)
if
strided
:
yield
(
name
,
strided
)
del
strided
del
strided
if
'wrong_size'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
if
'wrong_size'
in
prealloc_modes
or
'ALL'
in
prealloc_modes
:
...
@@ -1166,7 +1199,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1166,7 +1199,7 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
wrong_size
=
{}
wrong_size
=
{}
name
=
'wrong_size
%
s'
%
str
(
tuple
(
shape_diff
))
name
=
'wrong_size
%
s'
%
str
(
tuple
(
shape_diff
))
for
r
in
node
.
outputs
:
for
r
in
considered_
outputs
:
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
if
isinstance
(
r
.
type
,
(
TensorType
,
CudaNdarrayType
)):
r_shape_diff
=
shape_diff
[:
r
.
ndim
]
r_shape_diff
=
shape_diff
[:
r
.
ndim
]
out_shape
=
[
max
((
s
+
sd
),
0
)
out_shape
=
[
max
((
s
+
sd
),
0
)
...
@@ -1177,12 +1210,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
...
@@ -1177,12 +1210,14 @@ def _get_preallocated_maps(node, thunk, prealloc_modes, def_val,
def_val
)
.
astype
(
r
.
type
.
dtype
)
def_val
)
.
astype
(
r
.
type
.
dtype
)
wrong_size
[
r
]
=
new_buf
wrong_size
[
r
]
=
new_buf
yield
(
name
,
wrong_size
)
if
wrong_size
:
yield
(
name
,
wrong_size
)
del
wrong_size
del
wrong_size
def
_check_preallocated_output
(
node
,
thunk
,
prealloc_modes
,
def_val
,
def
_check_preallocated_output
(
node
,
thunk
,
prealloc_modes
,
def_val
,
storage_map
,
r_vals
,
dr_vals
,
perform
,
active_order_set
):
storage_map
,
r_vals
,
dr_vals
,
perform
,
active_order_set
,
inplace_outs
):
'''Try to apply thunk() on different output storages'''
'''Try to apply thunk() on different output storages'''
# If node has an inner compiled Theano function with mode DebugMode,
# If node has an inner compiled Theano function with mode DebugMode,
...
@@ -1211,20 +1246,30 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
...
@@ -1211,20 +1246,30 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
changed_inner_mode
=
True
changed_inner_mode
=
True
_logger
.
info
(
'changing inner mode'
)
_logger
.
info
(
'changing inner mode'
)
# Set of inputs that are marked as destroyed or viewed
aliased_inputs
=
set
()
dmap
=
getattr
(
node
.
op
,
'destroy_map'
,
{})
vmap
=
getattr
(
node
.
op
,
'view_map'
,
{})
for
i
,
r
in
enumerate
(
node
.
inputs
):
if
any
(
i
in
v
for
v
in
(
dmap
.
values
()
+
vmap
.
values
())):
aliased_inputs
.
add
(
r
)
_logger
.
debug
(
'starting preallocated output checking'
)
_logger
.
debug
(
'starting preallocated output checking'
)
for
(
name
,
out_map
)
in
_get_preallocated_maps
(
for
(
name
,
out_map
)
in
_get_preallocated_maps
(
node
,
thunk
,
prealloc_modes
,
def_val
,
storage_map
,
r_vals
,
node
,
thunk
,
prealloc_modes
,
def_val
,
storage_map
,
r_vals
,
dr_vals
,
perform
,
active_order_set
):
dr_vals
,
perform
,
active_order_set
,
inplace_outs
):
_logger
.
debug
(
' name =
%
s'
,
name
)
_logger
.
debug
(
' name =
%
s'
,
name
)
if
not
out_map
:
# Map is empty, there is no need to execute thunk() again
_logger
.
warn
(
'
%
s: out_map is empty'
,
name
)
continue
# Copy the inputs over, if they were marked as destroyed or viewed
# Copy the inputs over, if they were marked as destroyed or viewed
# (we will destroy the output at some point so it can destroy
# (we will destroy the output at some point so it can destroy
# the input)
# the input)
dmap
=
getattr
(
node
.
op
,
'destroy_map'
,
{})
for
r
in
aliased_inputs
:
vmap
=
getattr
(
node
.
op
,
'view_map'
,
{})
storage_map
[
r
][
0
]
=
_lessbroken_deepcopy
(
r_vals
[
r
])
for
i
,
r
in
enumerate
(
node
.
inputs
):
if
any
(
i
in
v
for
v
in
(
dmap
.
values
()
+
vmap
.
values
())):
storage_map
[
r
][
0
]
=
_lessbroken_deepcopy
(
r_vals
[
r
])
# Get the appropriate output storages
# Get the appropriate output storages
# (no copy)
# (no copy)
...
@@ -1724,11 +1769,11 @@ class _Linker(gof.link.LocalLinker):
...
@@ -1724,11 +1769,11 @@ class _Linker(gof.link.LocalLinker):
raise
InvalidValueError
(
r
,
storage_map
[
r
][
0
],
raise
InvalidValueError
(
r
,
storage_map
[
r
][
0
],
hint
=
'perform output'
,
hint
=
'perform output'
,
specific_hint
=
hint2
)
specific_hint
=
hint2
)
py_inplace_outs
=
_check_inputs
(
_check_inputs
(
node
,
storage_map
,
r_vals
,
dr_vals
,
node
,
storage_map
,
r_vals
,
dr_vals
,
active_order_set
,
active_order_set
,
clobber_dr_vals
=
True
,
perform
=
'py'
,
clobber_dr_vals
=
True
,
perform
=
'py'
,
warn_input_not_reused
=
config
.
DebugMode
.
warn_input_not_reused
)
warn_input_not_reused
=
config
.
DebugMode
.
warn_input_not_reused
)
_check_viewmap
(
node
,
storage_map
)
_check_viewmap
(
node
,
storage_map
)
...
@@ -1756,7 +1801,8 @@ class _Linker(gof.link.LocalLinker):
...
@@ -1756,7 +1801,8 @@ class _Linker(gof.link.LocalLinker):
r_vals
=
r_vals
,
r_vals
=
r_vals
,
dr_vals
=
dr_vals
,
dr_vals
=
dr_vals
,
perform
=
'py'
,
perform
=
'py'
,
active_order_set
=
active_order_set
)
active_order_set
=
active_order_set
,
inplace_outs
=
py_inplace_outs
)
# print >> sys.stderr, i, "DEBUGMODE thunk_py %100s %50s %30s" % (node,
# print >> sys.stderr, i, "DEBUGMODE thunk_py %100s %50s %30s" % (node,
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
...
@@ -1805,10 +1851,11 @@ class _Linker(gof.link.LocalLinker):
...
@@ -1805,10 +1851,11 @@ class _Linker(gof.link.LocalLinker):
self
.
maker
.
mode
.
require_matching_strides
,
self
.
maker
.
mode
.
require_matching_strides
,
node
.
op
)
node
.
op
)
_check_inputs
(
node
,
storage_map
,
r_vals
,
c_inplace_outs
=
_check_inputs
(
dr_vals
,
active_order_set
,
node
,
storage_map
,
r_vals
,
clobber_dr_vals
=
clobber
,
perform
=
'c'
,
dr_vals
,
active_order_set
,
warn_input_not_reused
=
config
.
DebugMode
.
warn_input_not_reused
)
clobber_dr_vals
=
clobber
,
perform
=
'c'
,
warn_input_not_reused
=
config
.
DebugMode
.
warn_input_not_reused
)
_check_viewmap
(
node
,
storage_map
)
_check_viewmap
(
node
,
storage_map
)
...
@@ -1848,7 +1895,8 @@ class _Linker(gof.link.LocalLinker):
...
@@ -1848,7 +1895,8 @@ class _Linker(gof.link.LocalLinker):
r_vals
=
r_vals
,
r_vals
=
r_vals
,
dr_vals
=
dr_vals
,
dr_vals
=
dr_vals
,
perform
=
'c code'
,
perform
=
'c code'
,
active_order_set
=
active_order_set
)
active_order_set
=
active_order_set
,
inplace_outs
=
c_inplace_outs
)
# print >> sys.stderr, i, "DEBUGMODE thunk_c %100s %50s %30s" % (node,
# print >> sys.stderr, i, "DEBUGMODE thunk_c %100s %50s %30s" % (node,
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
#[(id(o), numpy.asarray(storage_map[o][0])[0,0]) for o in node.inputs],
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论