Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
918c51cb
提交
918c51cb
authored
10月 22, 2014
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2195 from daemonmaker/issue1903
Updated local_alloc_elemwise to remove all allocs when possible and to a...
上级
165eb4e6
3867a916
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
247 行增加
和
58 行删除
+247
-58
config.txt
doc/library/config.txt
+14
-0
opt.py
theano/tensor/opt.py
+83
-58
test_opt.py
theano/tensor/tests/test_opt.py
+150
-0
没有找到文件。
doc/library/config.txt
浏览文件 @
918c51cb
...
@@ -462,6 +462,20 @@ import theano and print the config variable, as in:
...
@@ -462,6 +462,20 @@ import theano and print the config variable, as in:
Link arguments to link against a (Fortran) level-3 blas implementation.
Link arguments to link against a (Fortran) level-3 blas implementation.
.. attribute:: config.experimental.local_alloc_elemwise_assert
Bool value: either True or False
Default: True
When the local_alloc_optimization is applied, add an assert to highlight
shape errors.
Without such asserts this optimization could hide errors in the user code.
We add the assert only if we can't infer that the shapes are equivalent.
As such this optimization does not always introduce an assert in the graph.
Removing the assert could speed up execution.
.. attribute:: config.cuda.root
.. attribute:: config.cuda.root
Default: $CUDA_ROOT or failing that, "/usr/local/cuda"
Default: $CUDA_ROOT or failing that, "/usr/local/cuda"
...
...
theano/tensor/opt.py
浏览文件 @
918c51cb
...
@@ -1408,6 +1408,7 @@ def local_useless_elemwise(node):
...
@@ -1408,6 +1408,7 @@ def local_useless_elemwise(node):
return
[
node
.
inputs
[
0
]]
return
[
node
.
inputs
[
0
]]
if
node
.
op
.
scalar_op
==
theano
.
scalar
.
add
and
len
(
node
.
inputs
)
==
1
:
if
node
.
op
.
scalar_op
==
theano
.
scalar
.
add
and
len
(
node
.
inputs
)
==
1
:
return
[
node
.
inputs
[
0
]]
return
[
node
.
inputs
[
0
]]
if
(
node
.
op
.
scalar_op
==
theano
.
scalar
.
identity
if
(
node
.
op
.
scalar_op
==
theano
.
scalar
.
identity
and
len
(
node
.
inputs
)
==
1
):
and
len
(
node
.
inputs
)
==
1
):
return
[
node
.
inputs
[
0
]]
return
[
node
.
inputs
[
0
]]
...
@@ -1529,14 +1530,15 @@ def local_remove_useless_assert(node):
...
@@ -1529,14 +1530,15 @@ def local_remove_useless_assert(node):
return
[
assert_
(
node
.
inputs
[
0
],
*
cond
)]
return
[
assert_
(
node
.
inputs
[
0
],
*
cond
)]
@register_specialize
@gof.local_optimizer
([
T
.
Elemwise
])
@gof.local_optimizer
([
T
.
Elemwise
])
def
local_alloc_elemwise
(
node
):
def
local_alloc_elemwise
(
node
):
"""
"""
elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION))
elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION))
-> elemwise(x, y.TensorType(
no broadcast flag
))
-> elemwise(x, y.TensorType(
BROADCAST CONDITION
))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION))
-> elemwise(x
, y.TensorType(no broadcast flag
))
-> elemwise(x
.dimshuffle(...), y.TensorType(BROADCAST CONDITION
))
BROADCAST CONDITION: the condition is that the one input that are
BROADCAST CONDITION: the condition is that the one input that are
not to be optimized to have the same broadcast pattern as the
not to be optimized to have the same broadcast pattern as the
...
@@ -1548,99 +1550,122 @@ def local_alloc_elemwise(node):
...
@@ -1548,99 +1550,122 @@ def local_alloc_elemwise(node):
"""
"""
if
not
isinstance
(
node
.
op
,
T
.
Elemwise
):
if
not
isinstance
(
node
.
op
,
T
.
Elemwise
):
return
False
return
False
if
len
(
node
.
outputs
)
>
1
:
if
len
(
node
.
outputs
)
>
1
:
#This is a supposition this code make that I'm not sure is always true.
# Ensure all outputs have the same broadcast pattern
assert
all
([
list
(
o
.
type
.
broadcastable
)
==
list
(
# This is a supposition that I'm not sure is always true.
node
.
outputs
[
0
]
.
type
.
broadcastable
)
for
o
in
assert
all
([
o
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
o
in
node
.
outputs
[
1
:]])
node
.
outputs
[
1
:]])
if
not
any
([
list
(
i
.
type
.
broadcastable
)
==
list
(
# The broadcast pattern of the ouptut must match the broadcast pattern of
node
.
outputs
[
0
]
.
type
.
broadcastable
)
for
i
in
node
.
inputs
]):
# at least one of the inputs.
if
not
any
([
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
i
in
node
.
inputs
]):
return
False
return
False
if
not
any
([
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
T
.
Alloc
)
or
\
(
isinstance
(
i
.
owner
.
op
,
T
.
DimShuffle
)
and
def
dimshuffled_alloc
(
i
):
i
.
owner
.
inputs
[
0
]
.
owner
and
\
return
(
isinstance
(
i
.
owner
.
op
,
T
.
DimShuffle
)
and
isinstance
(
i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
T
.
Alloc
)))
i
.
owner
.
inputs
[
0
]
.
owner
and
\
isinstance
(
i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
T
.
Alloc
))
# At least one input must have an owner that is either a T.Alloc or a
# T.DimShuffle with an owner that is a T.Alloc -- otherwise there is
# nothing to optimize.
if
not
any
([
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
T
.
Alloc
)
or
dimshuffled_alloc
(
i
))
for
i
in
node
.
inputs
]):
for
i
in
node
.
inputs
]):
return
False
return
False
no_broad_idx
=
-
1
## Search for input that we can use as a baseline for the dimensions.
assert_op_idx
=
-
1
for
idx
,
i
in
enumerate
(
node
.
inputs
):
for
idx
,
i
in
enumerate
(
node
.
inputs
):
if
not
i
.
owner
:
if
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
:
if
list
(
i
.
type
.
broadcastable
)
==
[
False
,
]
*
i
.
type
.
ndim
:
# Prefer an input that is not a T.Alloc nor a T.DimShuffle of a
no_broad_idx
=
idx
# T.Alloc so that all allocs can be optimized.
if
not
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
T
.
Alloc
)
or
dimshuffled_alloc
(
i
))):
assert_op_idx
=
idx
break
break
else
:
continue
if
not
any
(
i
.
type
.
broadcastable
)
and
not
isinstance
(
i
.
owner
.
op
,
T
.
Alloc
):
no_broad_idx
=
idx
break
elif
list
(
i
.
type
.
broadcastable
)
==
list
(
node
.
outputs
[
0
]
.
type
.
broadcastable
)
\
and
not
isinstance
(
i
.
owner
.
op
,
T
.
Alloc
)
\
and
not
(
isinstance
(
i
.
owner
.
op
,
T
.
DimShuffle
)
and
i
.
owner
.
inputs
[
0
]
.
owner
and
\
isinstance
(
i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
T
.
Alloc
)):
no_broad_idx
=
idx
break
assert
no_broad_idx
>=
0
# It may be the case that only T.Allocs and T.DimShuffle of T.Allocs exist.
assert_op
=
node
.
inputs
[
no_broad_idx
]
if
assert_op_idx
<
0
:
# We want to optimize as many allocs as possible. When there is more
# than one then do all but one.
# number of inputs with alloc or dimshuffle alloc
l2
=
[
i
for
i
in
node
.
inputs
if
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
T
.
Alloc
)
or
dimshuffled_alloc
(
i
)))]
# If only 1 alloc or dimshuffle alloc, it is the one we will use for the shape
# So no alloc would be removed.
if
len
(
l2
)
>
1
:
# l containt inputs with alloc or dimshuffle alloc only.
# Its length will always be at least one, as we checked that before
l
=
[
idx
for
idx
,
i
in
enumerate
(
node
.
inputs
)
if
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
]
assert_op_idx
=
l
[
0
]
# The first one is as good as any to use.
else
:
# Nothing would be optimized!
return
False
assert_op
=
node
.
inputs
[
assert_op_idx
]
cmp_op
=
assert_op
cmp_op
=
assert_op
new
=
[]
new
_i
=
[]
for
i
in
node
.
inputs
:
for
i
in
node
.
inputs
:
# Remove alloc
if
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
T
.
Alloc
)
if
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
T
.
Alloc
)
and
i
.
owner
.
inputs
[
0
]
.
type
!=
i
.
owner
.
outputs
[
0
]
.
type
):
and
i
.
owner
.
inputs
[
0
]
.
type
!=
i
.
owner
.
outputs
[
0
]
.
type
):
# when i.owner.inputs[0].type == i.owner.outputs[0].type we
# when i.owner.inputs[0].type == i.owner.outputs[0].type we
# will remove that alloc later
# will remove that alloc later
assert
i
.
type
.
ndim
==
cmp_op
.
ndim
assert
i
.
type
.
ndim
==
cmp_op
.
ndim
if
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
:
if
(
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
and
not
node
.
fgraph
.
shape_feature
.
same_shape
(
i
,
cmp_op
)):
assert_op
=
assert_
(
assert_op
,
assert_op
=
assert_
(
assert_op
,
*
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
\
*
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
\
for
idx
in
xrange
(
i
.
type
.
ndim
)
\
for
idx
in
xrange
(
i
.
type
.
ndim
)
\
if
not
i
.
type
.
broadcastable
[
idx
]])
if
not
i
.
type
.
broadcastable
[
idx
]])
new
.
append
(
i
.
owner
.
inputs
[
0
])
new_i
.
append
(
i
.
owner
.
inputs
[
0
])
elif
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
T
.
DimShuffle
)
\
and
i
.
owner
.
inputs
[
0
]
.
owner
\
# Remove Alloc in DimShuffle
and
isinstance
(
i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
T
.
Alloc
):
elif
i
.
owner
and
dimshuffled_alloc
(
i
):
assert
i
.
type
.
ndim
==
cmp_op
.
type
.
ndim
assert
i
.
type
.
ndim
==
cmp_op
.
type
.
ndim
if
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
:
if
(
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
and
not
node
.
fgraph
.
shape_feature
.
same_shape
(
i
,
cmp_op
)):
assert_op
=
assert_
(
assert_op
,
assert_op
=
assert_
(
assert_op
,
*
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
*
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
for
idx
in
xrange
(
i
.
type
.
ndim
)
if
not
i
.
type
.
broadcastable
[
idx
]])
if
not
i
.
type
.
broadcastable
[
idx
]])
new
.
append
(
i
.
owner
.
inputs
[
0
]
.
owner
.
inputs
[
0
])
new
_i
.
append
(
i
.
owner
.
inputs
[
0
]
.
owner
.
inputs
[
0
])
else
:
else
:
new
.
append
(
i
)
new_i
.
append
(
i
)
new
[
no_broad_idx
]
=
assert_op
new_i
[
assert_op_idx
]
=
assert_op
if
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
:
assert
assert_op
.
owner
.
op
is
assert_
return
node
.
op
(
*
new_i
,
return_list
=
True
)
return
[
node
.
op
(
*
new
)]
#TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, global optimizer that lift the assert to the beginning of the graph.
#TODO, when all inputs can be optimized do all except one
#TODO, optimize all inputs when possible -- currently when all inputs have
# an alloc all but one is optimized.
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
"If True enable the experimental optimization local_alloc_elemwise"
,
"DEPRECATED: If True, enable the experimental"
theano
.
configparser
.
BoolParam
(
False
),
" optimization local_alloc_elemwise."
in_c_key
=
False
)
" Generates error if not True. Use"
#This version if faster but not as save.
" optimizer_excluding=local_alloc_elemwise"
" to dsiable."
,
theano
.
configparser
.
BoolParam
(
True
,
is_valid
=
lambda
x
:
x
),
in_c_key
=
False
)
#This version if faster but not as safe.
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise_assert'
,
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise_assert'
,
"If False enable the experimental optimization local_alloc_elemwise"
"If False enable the experimental optimization local_alloc_elemwise"
" but WITHOUT assert into the graph!"
,
" but WITHOUT assert into the graph!"
,
theano
.
configparser
.
BoolParam
(
True
),
theano
.
configparser
.
BoolParam
(
True
),
in_c_key
=
False
)
in_c_key
=
False
)
if
theano
.
config
.
experimental
.
local_alloc_elemwise
:
#enabled by default when the lifter of assert is done.
register_specialize
(
local_alloc_elemwise
)
else
:
#don't register them in fast_run by default to have them disabled
#by default disable them by default as we are not sure it is
#always a good idea to replace an alloc with multiple op.
compile
.
optdb
[
'specialize'
]
.
register
(
"local_alloc_elemwise"
,
local_alloc_elemwise
)
############################
############################
# Constant Canonicalization
# Constant Canonicalization
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
918c51cb
...
@@ -2512,6 +2512,156 @@ def test_local_subtensor_of_dot():
...
@@ -2512,6 +2512,156 @@ def test_local_subtensor_of_dot():
f
=
theano
.
function
([
m1
,
m2
,
idx
],
theano
.
dot
(
m1
,
m2
)[
1
:
4
,:,
idx
:,
idx
],
mode
=
mode
)
f
=
theano
.
function
([
m1
,
m2
,
idx
],
theano
.
dot
(
m1
,
m2
)[
1
:
4
,:,
idx
:,
idx
],
mode
=
mode
)
assert
test_equality
(
f
(
d1
,
d2
,
1
),
numpy
.
dot
(
d1
,
d2
)[
1
:
4
,:,
1
:,
1
])
assert
test_equality
(
f
(
d1
,
d2
,
1
),
numpy
.
dot
(
d1
,
d2
)[
1
:
4
,:,
1
:,
1
])
class
Test_local_alloc_elemwise
(
unittest
.
TestCase
):
dtype
=
config
.
floatX
def
setUp
(
self
):
self
.
vec
=
T
.
vector
(
'vec'
,
dtype
=
theano
.
config
.
floatX
)
self
.
mat
=
T
.
matrix
(
'mat'
,
dtype
=
theano
.
config
.
floatX
)
self
.
tens
=
T
.
tensor3
(
'tens'
,
dtype
=
theano
.
config
.
floatX
)
self
.
alloc_wo_dep
=
T
.
alloc
(
self
.
vec
,
2
,
2
)
self
.
alloc_w_dep
=
T
.
alloc
(
self
.
vec
,
*
self
.
mat
.
shape
)
def
_verify_alloc_count
(
self
,
f
,
count
):
assert
(
sum
([
isinstance
(
elem
.
op
,
T
.
Alloc
)
for
elem
in
f
.
maker
.
fgraph
.
toposort
()
if
elem
.
op
is
not
None
])
==
count
)
def
_verify_assert_count
(
self
,
f
,
count
):
assert
(
sum
([
isinstance
(
elem
.
op
,
T
.
opt
.
Assert
)
for
elem
in
f
.
maker
.
fgraph
.
toposort
()
if
elem
.
op
is
not
None
])
==
count
)
def
test_remove_alloc_wo_dimshuffle
(
self
):
# No optimization on alloc
func
=
function
(
[
self
.
vec
,
self
.
mat
],
self
.
alloc_wo_dep
+
self
.
mat
,
mode
=
'FAST_COMPILE'
)
self
.
_verify_alloc_count
(
func
,
1
)
self
.
_verify_assert_count
(
func
,
0
)
# Optimization on alloc with assert
func
=
function
(
[
self
.
vec
,
self
.
mat
],
self
.
alloc_wo_dep
+
self
.
mat
,
mode
=
'FAST_RUN'
)
self
.
_verify_alloc_count
(
func
,
0
)
self
.
_verify_assert_count
(
func
,
1
)
# No optimization on alloc without assert
func
=
function
(
[
self
.
vec
,
self
.
mat
],
self
.
alloc_w_dep
+
self
.
mat
,
mode
=
'FAST_COMPILE'
)
self
.
_verify_alloc_count
(
func
,
1
)
self
.
_verify_assert_count
(
func
,
0
)
# Optimization on alloc without assert
func
=
function
(
[
self
.
vec
,
self
.
mat
],
self
.
alloc_w_dep
+
self
.
mat
,
mode
=
'FAST_RUN'
)
self
.
_verify_alloc_count
(
func
,
0
)
self
.
_verify_assert_count
(
func
,
0
)
def
test_remove_alloc_w_dimshuffle
(
self
):
# No optimization on dimshuffle with assert
func
=
function
(
[
self
.
vec
,
self
.
tens
],
T
.
alloc
(
self
.
vec
,
2
,
2
)
.
dimshuffle
(
0
,
1
,
'x'
)
+
self
.
tens
,
mode
=
'FAST_COMPILE'
)
self
.
_verify_alloc_count
(
func
,
1
)
self
.
_verify_assert_count
(
func
,
0
)
# Optimization on dimshuffle with assert
func
=
function
(
[
self
.
vec
,
self
.
tens
],
T
.
alloc
(
self
.
vec
,
2
,
2
)
.
dimshuffle
(
0
,
1
,
'x'
)
+
self
.
tens
,
mode
=
'FAST_RUN'
)
self
.
_verify_alloc_count
(
func
,
0
)
self
.
_verify_assert_count
(
func
,
1
)
# No optimization on dimshuffle without assert
func
=
function
(
[
self
.
vec
,
self
.
tens
],
T
.
alloc
(
self
.
vec
,
self
.
tens
.
shape
[
0
],
self
.
tens
.
shape
[
1
]
)
.
dimshuffle
(
0
,
1
,
'x'
)
+
self
.
tens
,
mode
=
'FAST_COMPILE'
)
self
.
_verify_alloc_count
(
func
,
1
)
self
.
_verify_assert_count
(
func
,
0
)
# Optimization on dimshuffle without assert
func
=
function
(
[
self
.
vec
,
self
.
tens
],
T
.
alloc
(
self
.
vec
,
self
.
tens
.
shape
[
0
],
self
.
tens
.
shape
[
1
]
)
.
dimshuffle
(
0
,
1
,
'x'
)
+
self
.
tens
,
mode
=
'FAST_RUN'
)
self
.
_verify_alloc_count
(
func
,
0
)
self
.
_verify_assert_count
(
func
,
0
)
def
test_multi_input_single_alloc
(
self
):
tv
=
T
.
alloc
(
self
.
vec
,
5
,
5
)
tm
=
T
.
alloc
(
self
.
mat
,
5
,
5
,
5
)
func
=
function
(
[
self
.
vec
,
self
.
mat
],
tv
+
tm
,
mode
=
'FAST_COMPILE'
)
self
.
_verify_alloc_count
(
func
,
2
)
self
.
_verify_assert_count
(
func
,
0
)
func
=
function
(
[
self
.
vec
,
self
.
mat
],
tv
+
tm
,
mode
=
'FAST_RUN'
)
self
.
_verify_alloc_count
(
func
,
1
)
self
.
_verify_assert_count
(
func
,
0
)
s
=
T
.
iscalar
(
's'
)
tv
=
T
.
alloc
(
self
.
vec
,
s
,
s
)
tm
=
T
.
alloc
(
self
.
mat
,
5
,
5
,
5
)
func
=
function
(
[
self
.
vec
,
self
.
mat
,
s
],
tv
+
tm
,
mode
=
'FAST_COMPILE'
)
self
.
_verify_alloc_count
(
func
,
2
)
self
.
_verify_assert_count
(
func
,
0
)
func
=
function
(
[
self
.
vec
,
self
.
mat
,
s
],
tv
+
tm
,
mode
=
'FAST_RUN'
)
self
.
_verify_alloc_count
(
func
,
1
)
self
.
_verify_assert_count
(
func
,
1
)
def
test_local_subtensor_of_alloc
():
def
test_local_subtensor_of_alloc
():
# DebugMode should detect if something goes wrong.
# DebugMode should detect if something goes wrong.
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论