Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
f0bd940e
提交
f0bd940e
authored
10月 19, 2015
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3477 from nouiz/crash_gpu
Crash gpu and opt speed up
上级
dab522df
7fce44ca
隐藏空白字符变更
内嵌
并排
正在显示
15 个修改的文件
包含
403 行增加
和
273 行删除
+403
-273
optimization.txt
doc/extending/optimization.txt
+3
-3
mode.py
theano/compile/mode.py
+10
-1
cc.py
theano/gof/cc.py
+2
-2
opt.py
theano/gof/opt.py
+134
-42
optdb.py
theano/gof/optdb.py
+14
-7
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+1
-1
cudnn_helper.h
theano/sandbox/cuda/cudnn_helper.h
+3
-3
dnn.py
theano/sandbox/cuda/dnn.py
+1
-1
opt.py
theano/sandbox/cuda/opt.py
+1
-1
type.py
theano/sandbox/cuda/type.py
+1
-1
basic.py
theano/tensor/basic.py
+2
-2
opt.py
theano/tensor/opt.py
+212
-204
downsample.py
theano/tensor/signal/downsample.py
+4
-1
test_downsample.py
theano/tensor/signal/tests/test_downsample.py
+10
-0
test_opt.py
theano/tensor/tests/test_opt.py
+5
-4
没有找到文件。
doc/extending/optimization.txt
浏览文件 @
f0bd940e
...
...
@@ -212,11 +212,11 @@ optimization you wrote. For example, consider the following:
Nothing happened here. The reason is: ``add(y, z) != add(y,
z)``. That is the case for efficiency reasons. To fix this problem we
first need to merge the parts of the graph that represent the same
computation, using the ``
merge_o
ptimizer`` defined in
computation, using the ``
MergeO
ptimizer`` defined in
``theano.gof.opt``.
>>> from theano.gof.opt import
merge_o
ptimizer
>>>
merge_optimizer
.optimize(e) # doctest: +ELLIPSIS
>>> from theano.gof.opt import
MergeO
ptimizer
>>>
MergeOptimizer()
.optimize(e) # doctest: +ELLIPSIS
(0, ..., None, None, {}, 1, 0)
>>> e
[true_div(mul(*1 -> add(y, z), x), *1)]
...
...
theano/compile/mode.py
浏览文件 @
f0bd940e
...
...
@@ -198,8 +198,17 @@ optdb.register('merge1', gof.MergeOptimizer(),
0
,
'fast_run'
,
'fast_compile'
,
'merge'
)
# rearranges elemwise expressions
optdb
.
register
(
'canonicalize'
,
gof
.
EquilibriumDB
(),
optdb
.
register
(
'canonicalize'
,
gof
.
EquilibriumDB
(
ignore_newtrees
=
False
),
1
,
'fast_run'
,
'fast_compile'
)
# Register in the canonizer Equilibrium as a clean up opt the merge opt.
# Without this, as the equilibrium have ignore_newtrees=False, we
# won't merge all nodes if it is set as a global optimizer with
# final_opt=True.
# We need a new instance of MergeOptimizer to don't have its name
# changed by other usage of it.
optdb
[
'canonicalize'
]
.
register
(
"merge"
,
gof
.
opt
.
MergeOptimizer
(),
'fast_run'
,
"fast_compile"
,
cleanup
=
True
)
optdb
.
register
(
'merge1.2'
,
gof
.
MergeOptimizer
(),
1.2
,
'fast_run'
,
'fast_compile'
,
'merge'
)
...
...
theano/gof/cc.py
浏览文件 @
f0bd940e
...
...
@@ -547,6 +547,7 @@ class CLinker(link.Linker):
if
no_recycling
is
None
:
no_recycling
=
[]
if
self
.
fgraph
is
not
None
and
self
.
fgraph
is
not
fgraph
:
# A linker can be tied to only one FunctionGraph.
return
type
(
self
)(
self
.
schedule
)
.
accept
(
fgraph
,
no_recycling
)
self
.
fgraph
=
fgraph
self
.
fetch_variables
()
...
...
@@ -1750,14 +1751,13 @@ class OpWiseCLinker(link.LocalLinker):
if
no_recycling
is
None
:
no_recycling
=
[]
if
self
.
fgraph
is
not
None
and
self
.
fgraph
is
not
fgraph
:
# A linker can be tied to only one FunctionGraph.
return
type
(
self
)(
fallback_on_perform
=
self
.
fallback_on_perform
,
allow_gc
=
self
.
allow_gc
,
nice_errors
=
self
.
nice_errors
,
schedule
=
self
.
schedule
,
)
.
accept
(
fgraph
,
no_recycling
)
# raise Exception("Cannot accept from a Linker that is
# already tied to another FunctionGraph.")
self
.
fgraph
=
fgraph
self
.
no_recycling
=
no_recycling
return
self
...
...
theano/gof/opt.py
浏览文件 @
f0bd940e
...
...
@@ -873,8 +873,23 @@ class MergeOptimizer(Optimizer):
if
i
[
1
]
>
0
:
print
(
i
)
merge_optimizer
=
MergeOptimizer
()
@staticmethod
def
merge_profile
(
prof1
,
prof2
):
def
merge_none_number
(
v1
,
v2
):
if
v1
is
None
:
return
v2
if
v2
is
None
:
return
v1
return
v1
+
v2
nb_fail
=
prof1
[
0
]
+
prof2
[
0
]
replace_time
=
prof1
[
1
]
+
prof2
[
1
]
validate_time
=
merge_none_number
(
prof1
[
2
],
prof2
[
2
])
callback_time
=
merge_none_number
(
prof1
[
3
],
prof2
[
3
])
callbacks_time
=
merge_dict
(
prof1
[
4
],
prof2
[
4
])
nb_merged
=
prof1
[
5
]
+
prof2
[
5
]
nb_constant
=
prof1
[
6
]
+
prof2
[
6
]
return
(
nb_fail
,
replace_time
,
validate_time
,
callback_time
,
callbacks_time
,
nb_merged
,
nb_constant
)
def
is_same_graph_with_merge
(
var1
,
var2
,
givens
=
None
):
...
...
@@ -899,7 +914,7 @@ def is_same_graph_with_merge(var1, var2, givens=None):
for
to_replace
,
replace_by
in
iteritems
(
givens
):
fgraph
.
replace
(
to_replace
,
replace_by
)
# Perform merge optimization.
merge_optimizer
.
optimize
(
fgraph
)
MergeOptimizer
()
.
optimize
(
fgraph
)
# When two variables perform the same computations, they will have the same
# owner in the optimized graph.
# We need to be careful with the special case where the owner is None,
...
...
@@ -1165,7 +1180,7 @@ class FromFunctionLocalOptimizer(LocalOptimizer):
id
(
self
)),
file
=
stream
)
def
local_optimizer
(
tracks
,
inplace
=
False
):
def
local_optimizer
(
tracks
,
inplace
=
False
,
requirements
=
()
):
def
decorator
(
f
):
"""
WRITEME
...
...
@@ -1177,12 +1192,13 @@ def local_optimizer(tracks, inplace=False):
for
t
in
tracks
:
if
not
(
isinstance
(
t
,
op
.
Op
)
or
issubclass
(
t
,
op
.
PureOp
)):
raise
ValueError
(
"Tracks are op classes or instances"
,
f
.
__module__
,
f
.
__name__
)
req
uirements
=
()
req
=
requirements
if
inplace
:
dh_handler
=
dh
.
DestroyHandler
requirements
=
(
lambda
fgraph
:
fgraph
.
attach_feature
(
dh_handler
()),)
rval
=
FromFunctionLocalOptimizer
(
f
,
tracks
,
requirements
)
req
=
tuple
(
requirements
)
+
(
lambda
fgraph
:
fgraph
.
attach_feature
(
dh_handler
()),)
rval
=
FromFunctionLocalOptimizer
(
f
,
tracks
,
req
)
rval
.
__name__
=
f
.
__name__
return
rval
return
decorator
...
...
@@ -1974,19 +1990,41 @@ class ChangeTracker:
fgraph
.
change_tracker
=
self
def
merge_dict
(
d1
,
d2
):
"""
merge 2 dicts by adding the values.
"""
d
=
d1
.
copy
()
for
k
,
v
in
iteritems
(
d2
):
if
k
in
d
:
d
[
k
]
+=
v
else
:
d
[
k
]
=
v
return
d
class
EquilibriumOptimizer
(
NavigatorOptimizer
):
"""
Apply optimizations until equilibrium point.
Parameters
----------
optimizers
List or set of local or global optimizations to apply until equilibrium.
max_use_ratio
optimizers : list or set
Local or global optimizations to apply until equilibrium.
The global optimizer will be run at the start of each iteration before
the local optimizer.
max_use_ratio : int or float
Each optimizer can be applied at most (size of graph * this number)
times.
ignore_newtrees
See EquilibriumDB ignore_newtrees parameter definition.
final_optimizers
Global optimizers that will be run after each iteration.
cleanup_optimizers
Global optimizers that apply a list of pre determined optimization.
They must not traverse the graph as they are called very frequently.
The MergeOptimizer is one example of optimization that respect this.
They are applied after all global optimizer, then when one local optimizer is applied, then after all final optimizer.
"""
...
...
@@ -1995,7 +2033,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
failure_callback
=
None
,
ignore_newtrees
=
True
,
max_use_ratio
=
None
,
final_optimizers
=
None
):
final_optimizers
=
None
,
cleanup_optimizers
=
None
):
super
(
EquilibriumOptimizer
,
self
)
.
__init__
(
None
,
ignore_newtrees
=
ignore_newtrees
,
...
...
@@ -2004,6 +2043,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self
.
local_optimizers_all
=
[]
self
.
global_optimizers
=
[]
self
.
final_optimizers
=
[]
self
.
cleanup_optimizers
=
[]
for
opt
in
optimizers
:
if
isinstance
(
opt
,
LocalOptimizer
):
...
...
@@ -2016,6 +2056,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self
.
global_optimizers
.
append
(
opt
)
if
final_optimizers
:
self
.
final_optimizers
=
final_optimizers
if
cleanup_optimizers
:
self
.
cleanup_optimizers
=
cleanup_optimizers
self
.
max_use_ratio
=
max_use_ratio
assert
self
.
max_use_ratio
is
not
None
,
(
'max_use_ratio has to be a number'
)
...
...
@@ -2039,6 +2081,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
opt
.
add_requirements
(
fgraph
)
for
opt
in
self
.
final_optimizers
:
opt
.
add_requirements
(
fgraph
)
for
opt
in
self
.
cleanup_optimizers
:
opt
.
add_requirements
(
fgraph
)
def
apply
(
self
,
fgraph
,
start_from
=
None
):
change_tracker
=
ChangeTracker
()
...
...
@@ -2066,17 +2110,39 @@ class EquilibriumOptimizer(NavigatorOptimizer):
node_created
=
{}
global_sub_profs
=
[]
final_sub_profs
=
[]
cleanup_sub_profs
=
[]
for
opt
in
(
self
.
global_optimizers
+
list
(
self
.
get_local_optimizers
())
+
self
.
final_optimizers
):
self
.
final_optimizers
+
self
.
cleanup_optimizers
):
global_process_count
.
setdefault
(
opt
,
0
)
time_opts
.
setdefault
(
opt
,
0
)
node_created
.
setdefault
(
opt
,
0
)
def
apply_cleanup
(
profs_dict
):
changed
=
False
for
copt
in
self
.
cleanup_optimizers
:
change_tracker
.
reset
()
nb
=
change_tracker
.
nb_imported
t_opt
=
time
.
time
()
sub_prof
=
copt
.
apply
(
fgraph
)
time_opts
[
copt
]
+=
time
.
time
()
-
t_opt
profs_dict
[
copt
]
.
append
(
sub_prof
)
if
change_tracker
.
changed
:
process_count
.
setdefault
(
copt
,
0
)
process_count
[
copt
]
+=
1
global_process_count
[
copt
]
+=
1
changed
=
True
node_created
[
copt
]
+=
change_tracker
.
nb_imported
-
nb
return
changed
while
changed
and
not
max_use_abort
:
process_count
=
{}
t0
=
time
.
time
()
changed
=
False
iter_cleanup_sub_profs
=
{}
for
copt
in
self
.
cleanup_optimizers
:
iter_cleanup_sub_profs
[
copt
]
=
[]
# apply global optimizers
sub_profs
=
[]
...
...
@@ -2101,6 +2167,10 @@ class EquilibriumOptimizer(NavigatorOptimizer):
global_opt_timing
.
append
(
float
(
time
.
time
()
-
t0
))
# apply clean up as global opt can have done changes that
# request that
changed
|=
apply_cleanup
(
iter_cleanup_sub_profs
)
# apply local optimizer
topo_t0
=
time
.
time
()
q
=
deque
(
graph
.
io_toposort
(
fgraph
.
inputs
,
start_from
))
...
...
@@ -2134,19 +2204,21 @@ class EquilibriumOptimizer(NavigatorOptimizer):
t_opt
=
time
.
time
()
lopt_change
=
self
.
process_node
(
fgraph
,
node
,
lopt
)
time_opts
[
lopt
]
+=
time
.
time
()
-
t_opt
if
lopt_change
:
process_count
.
setdefault
(
lopt
,
0
)
process_count
[
lopt
]
+=
1
global_process_count
[
lopt
]
+=
1
changed
=
True
node_created
[
lopt
]
+=
change_tracker
.
nb_imported
-
nb
if
global_process_count
[
lopt
]
>
max_use
:
max_use_abort
=
True
opt_name
=
(
getattr
(
lopt
,
"name"
,
None
)
or
getattr
(
lopt
,
"__name__"
,
""
))
if
node
not
in
fgraph
.
apply_nodes
:
# go to next node
break
if
not
lopt_change
:
continue
process_count
.
setdefault
(
lopt
,
0
)
process_count
[
lopt
]
+=
1
global_process_count
[
lopt
]
+=
1
changed
=
True
node_created
[
lopt
]
+=
change_tracker
.
nb_imported
-
nb
changed
|=
apply_cleanup
(
iter_cleanup_sub_profs
)
if
global_process_count
[
lopt
]
>
max_use
:
max_use_abort
=
True
opt_name
=
(
getattr
(
lopt
,
"name"
,
None
)
or
getattr
(
lopt
,
"__name__"
,
""
))
if
node
not
in
fgraph
.
apply_nodes
:
# go to next node
break
finally
:
self
.
detach_updater
(
fgraph
,
u
)
...
...
@@ -2173,6 +2245,17 @@ class EquilibriumOptimizer(NavigatorOptimizer):
final_sub_profs
.
append
(
sub_profs
)
global_opt_timing
[
-
1
]
+=
time
.
time
()
-
t_before_final_opt
# apply clean up as final opt can have done changes that
# request that
changed
|=
apply_cleanup
(
iter_cleanup_sub_profs
)
# merge clean up profiles during that iteration.
c_sub_profs
=
[]
for
copt
,
sub_profs
in
iteritems
(
iter_cleanup_sub_profs
):
sub_prof
=
sub_profs
[
0
]
for
s_p
in
sub_profs
[
1
:]:
sub_prof
=
copt
.
merge_profile
(
sub_prof
,
s_p
)
c_sub_profs
.
append
(
sub_prof
)
cleanup_sub_profs
.
append
(
c_sub_profs
)
loop_process_count
.
append
(
process_count
)
loop_timing
.
append
(
float
(
time
.
time
()
-
t0
))
...
...
@@ -2188,7 +2271,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
return
(
self
,
loop_timing
,
loop_process_count
,
(
start_nb_nodes
,
end_nb_nodes
,
max_nb_nodes
),
global_opt_timing
,
nb_nodes
,
time_opts
,
io_toposort_timing
,
node_created
,
global_sub_profs
,
final_sub_profs
)
node_created
,
global_sub_profs
,
final_sub_profs
,
cleanup_sub_profs
)
def
print_summary
(
self
,
stream
=
sys
.
stdout
,
level
=
0
,
depth
=-
1
):
name
=
getattr
(
self
,
'name'
,
None
)
...
...
@@ -2204,7 +2287,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
(
opt
,
loop_timing
,
loop_process_count
,
(
start_nb_nodes
,
end_nb_nodes
,
max_nb_nodes
),
global_opt_timing
,
nb_nodes
,
time_opts
,
io_toposort_timing
,
node_created
,
global_sub_profs
,
final_sub_profs
)
=
prof
node_created
,
global_sub_profs
,
final_sub_profs
,
cleanup_sub_profs
)
=
prof
blanc
=
(
' '
*
level
)
print
(
blanc
,
"EquilibriumOptimizer"
,
end
=
' '
,
file
=
stream
)
...
...
@@ -2222,6 +2306,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print
(
blanc
,
" time in global optimizers
%.3
fs"
%
s
,
file
=
stream
)
s
=
sum
([
time_opts
[
o
]
for
o
in
opt
.
final_optimizers
])
print
(
blanc
,
" time in final optimizers
%.3
fs"
%
s
,
file
=
stream
)
s
=
sum
([
time_opts
[
o
]
for
o
in
opt
.
cleanup_optimizers
])
print
(
blanc
,
" time in cleanup optimizers
%.3
fs"
%
s
,
file
=
stream
)
for
i
in
range
(
len
(
loop_timing
)):
lopt
=
""
if
loop_process_count
[
i
]:
...
...
@@ -2245,7 +2331,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
process_count
=
{}
for
o
in
(
opt
.
global_optimizers
+
list
(
opt
.
get_local_optimizers
())
+
list
(
opt
.
final_optimizers
)):
list
(
opt
.
final_optimizers
)
+
list
(
opt
.
cleanup_optimizers
)):
process_count
.
setdefault
(
o
,
0
)
for
count
in
loop_process_count
:
for
o
,
v
in
iteritems
(
count
):
...
...
@@ -2275,12 +2362,13 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print
(
blanc
+
" "
,
'
%.3
fs -
%
s'
%
(
t
,
o
),
file
=
stream
)
print
(
file
=
stream
)
gf_opts
=
[
o
for
o
in
(
opt
.
global_optimizers
+
list
(
opt
.
final_optimizers
))
list
(
opt
.
final_optimizers
)
+
list
(
opt
.
cleanup_optimizers
))
if
o
.
print_profile
.
func_code
is
not
Optimizer
.
print_profile
.
func_code
]
if
not
gf_opts
:
return
print
(
blanc
,
"Global
and final optimizer
"
,
file
=
stream
)
print
(
blanc
,
"Global
, final and clean up optimizers
"
,
file
=
stream
)
for
i
in
range
(
len
(
loop_timing
)):
print
(
blanc
,
"Iter
%
d"
%
i
,
file
=
stream
)
for
o
,
prof
in
zip
(
opt
.
global_optimizers
,
global_sub_profs
[
i
]):
...
...
@@ -2293,6 +2381,11 @@ class EquilibriumOptimizer(NavigatorOptimizer):
o
.
print_profile
(
stream
,
prof
,
level
+
2
)
except
NotImplementedError
:
print
(
blanc
,
"merge not implemented for "
,
o
)
for
o
,
prof
in
zip
(
opt
.
cleanup_optimizers
,
cleanup_sub_profs
[
i
]):
try
:
o
.
print_profile
(
stream
,
prof
,
level
+
2
)
except
NotImplementedError
:
print
(
blanc
,
"merge not implemented for "
,
o
)
@staticmethod
def
merge_profile
(
prof1
,
prof2
):
...
...
@@ -2307,10 +2400,16 @@ class EquilibriumOptimizer(NavigatorOptimizer):
prof2
[
0
]
.
final_optimizers
)
else
:
final_optimizers
=
None
if
len
(
prof1
[
0
]
.
cleanup_optimizers
)
>
0
or
len
(
prof2
[
0
]
.
cleanup_optimizers
)
>
0
:
cleanup_optimizers
=
OrderedSet
(
prof1
[
0
]
.
cleanup_optimizers
)
.
union
(
prof2
[
0
]
.
cleanup_optimizers
)
else
:
cleanup_optimizers
=
None
new_opt
=
EquilibriumOptimizer
(
local_optimizers
.
union
(
global_optimizers
),
max_use_ratio
=
1
,
final_optimizers
=
final_optimizers
)
final_optimizers
=
final_optimizers
,
cleanup_optimizers
=
cleanup_optimizers
)
def
merge_list
(
l1
,
l2
):
l
=
copy
.
copy
(
l1
)
...
...
@@ -2321,15 +2420,6 @@ class EquilibriumOptimizer(NavigatorOptimizer):
l
.
append
(
nb
)
return
l
def
merge_dict
(
d1
,
d2
):
d
=
d1
.
copy
()
for
k
,
v
in
iteritems
(
d2
):
if
k
in
d
:
d
[
k
]
+=
v
else
:
d
[
k
]
=
v
return
d
loop_timing
=
merge_list
(
prof1
[
1
],
prof2
[
1
])
loop_process_count
=
list
(
prof1
[
2
])
...
...
@@ -2358,6 +2448,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
node_created
=
merge_dict
(
prof1
[
8
],
prof2
[
8
])
global_sub_profs
=
merge_list
(
prof1
[
9
],
prof2
[
9
])
final_sub_profs
=
merge_list
(
prof1
[
10
],
prof2
[
10
])
cleanup_sub_profs
=
merge_list
(
prof1
[
10
],
prof2
[
10
])
return
(
new_opt
,
loop_timing
,
loop_process_count
,
...
...
@@ -2368,7 +2459,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
io_toposort_timing
,
node_created
,
global_sub_profs
,
final_sub_profs
)
final_sub_profs
,
cleanup_sub_profs
)
#################
# Utilities #
...
...
theano/gof/optdb.py
浏览文件 @
f0bd940e
...
...
@@ -268,28 +268,35 @@ class EquilibriumDB(DB):
super
(
EquilibriumDB
,
self
)
.
__init__
()
self
.
ignore_newtrees
=
ignore_newtrees
self
.
__final__
=
{}
self
.
__cleanup__
=
{}
def
register
(
self
,
name
,
obj
,
*
tags
,
**
kwtags
):
if
'final_opt'
in
kwtags
:
final_opt
=
kwtags
[
'final_opt'
]
kwtags
.
pop
(
'final_opt'
,
None
)
else
:
final_opt
=
False
final_opt
=
kwtags
.
pop
(
'final_opt'
,
False
)
cleanup
=
kwtags
.
pop
(
'cleanup'
,
False
)
# An opt should not be final and clean up
assert
not
(
final_opt
and
cleanup
)
super
(
EquilibriumDB
,
self
)
.
register
(
name
,
obj
,
*
tags
,
**
kwtags
)
self
.
__final__
[
name
]
=
final_opt
self
.
__cleanup__
[
name
]
=
cleanup
def
query
(
self
,
*
tags
,
**
kwtags
):
_opts
=
super
(
EquilibriumDB
,
self
)
.
query
(
*
tags
,
**
kwtags
)
final_opts
=
[
o
for
o
in
_opts
if
self
.
__final__
.
get
(
o
.
name
,
False
)]
opts
=
[
o
for
o
in
_opts
if
o
not
in
final_opts
]
cleanup_opts
=
[
o
for
o
in
_opts
if
self
.
__cleanup__
.
get
(
o
.
name
,
False
)]
opts
=
[
o
for
o
in
_opts
if
o
not
in
final_opts
and
o
not
in
cleanup_opts
]
if
len
(
final_opts
)
==
0
:
final_opts
=
None
if
len
(
cleanup_opts
)
==
0
:
cleanup_opts
=
None
return
opt
.
EquilibriumOptimizer
(
opts
,
max_use_ratio
=
config
.
optdb
.
max_use_ratio
,
ignore_newtrees
=
self
.
ignore_newtrees
,
failure_callback
=
opt
.
NavigatorOptimizer
.
warn_inplace
,
final_optimizers
=
final_opts
)
final_optimizers
=
final_opts
,
cleanup_optimizers
=
cleanup_opts
)
class
SequenceDB
(
DB
):
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
f0bd940e
...
...
@@ -3622,7 +3622,7 @@ class GpuAllocEmpty(GpuOp):
const_shp
=
tensor
.
get_scalar_constant_value
(
s
)
except
tensor
.
NotScalarConstantError
:
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
)
)
bcast
.
append
(
1
==
const_shp
)
otype
=
CudaNdarrayType
(
dtype
=
'float32'
,
broadcastable
=
bcast
)
output
=
otype
()
return
sh
,
output
...
...
theano/sandbox/cuda/cudnn_helper.h
浏览文件 @
f0bd940e
...
...
@@ -48,7 +48,7 @@ cudnnSetTensorNdDescriptor(
int
nbDims
,
const
int
dimA
[],
const
int
strideA
[])
{
if
(
n
d
Dims
!=
4
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
n
b
Dims
!=
4
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
return
cudnnSetTensor4dDescriptorEx
(
tensorDesc
,
dataType
,
dimA
[
0
],
dimA
[
1
],
dimA
[
2
],
dimA
[
3
],
...
...
@@ -204,7 +204,7 @@ cudnnSetPoolingNdDescriptor(
int
nbDims
,
const
int
windowDimA
[],
const
int
paddingA
[],
const
in
strideA
[])
{
const
in
t
strideA
[])
{
if
(
nbDims
!=
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
paddingA
[
0
]
!=
0
||
paddingA
[
1
]
!=
0
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
return
cudnnSetPoolingDescriptor
(
poolingDesc
,
mode
,
...
...
@@ -223,7 +223,7 @@ cudnnGetPoolingNdDescriptor(
int
strideA
[])
{
int
win0
,
win1
,
str0
,
str1
;
cudnnStatus_t
err
;
if
(
n
d
DimsRequested
<
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
n
b
DimsRequested
<
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
err
=
cudnnGetPoolingDescriptor
(
poolingDesc
,
mode
,
&
win0
,
&
win1
,
&
str0
,
&
str1
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
return
err
;
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
f0bd940e
...
...
@@ -1760,7 +1760,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad}
pad
(pad_h, pad_w) padding information.
(pad_h, pad_w) padding information.
pad_h is the number of zero-valued pixels added to each of the top and
bottom borders.
pad_w is the number of zero-valued pixels added to each of the left
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
f0bd940e
...
...
@@ -104,7 +104,7 @@ optdb.register('gpu_after_fusion',
'gpu'
)
# Register merge_optimizer as a global opt
gpu_optimizer
.
register
(
'gpu_merge'
,
theano
.
gof
.
opt
.
merge_optimizer
,
gpu_optimizer
.
register
(
'gpu_merge'
,
theano
.
gof
.
opt
.
MergeOptimizer
()
,
'fast_run'
,
'fast_compile'
,
final_opt
=
True
)
...
...
theano/sandbox/cuda/type.py
浏览文件 @
f0bd940e
...
...
@@ -81,7 +81,7 @@ class CudaNdarrayType(Type):
raise
TypeError
(
'
%
s only supports dtype float32 for now. Tried '
'using dtype
%
s for variable
%
s'
%
(
self
.
__class__
.
__name__
,
dtype
,
name
))
self
.
broadcastable
=
tuple
(
broadcastable
)
self
.
broadcastable
=
tuple
(
b
ool
(
b
)
for
b
in
b
roadcastable
)
self
.
name
=
name
self
.
dtype_specs
()
# error checking is done there
...
...
theano/tensor/basic.py
浏览文件 @
f0bd940e
...
...
@@ -2673,7 +2673,7 @@ class Alloc(gof.Op):
const_shp
=
get_scalar_constant_value
(
s
)
except
NotScalarConstantError
:
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
)
)
bcast
.
append
(
1
==
const_shp
)
return
sh
,
bcast
def
make_node
(
self
,
value
,
*
shape
):
...
...
@@ -6037,7 +6037,7 @@ class AllocEmpty(gof.Op):
const_shp
=
get_scalar_constant_value
(
s
)
except
NotScalarConstantError
:
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
)
)
bcast
.
append
(
1
==
const_shp
)
otype
=
TensorType
(
dtype
=
self
.
dtype
,
broadcastable
=
bcast
)
output
=
otype
()
return
sh
,
output
...
...
theano/tensor/opt.py
浏览文件 @
f0bd940e
...
...
@@ -47,7 +47,6 @@ from theano.tensor.type import (values_eq_approx_remove_inf,
from
theano.gof.opt
import
(
Optimizer
,
pre_constant_merge
,
pre_greedy_local_optimizer
)
from
theano.gof.opt
import
merge_optimizer
from
theano.gof
import
toolbox
from
theano.tensor.basic
import
get_scalar_constant_value
,
ShapeError
,
NotScalarConstantError
from
six
import
StringIO
...
...
@@ -452,8 +451,9 @@ def register_canonicalize(lopt, *tags, **kwargs):
return
register_canonicalize
(
inner_lopt
,
lopt
,
*
tags
,
**
kwargs
)
return
register
else
:
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
))
or
lopt
.
__name__
compile
.
optdb
[
'canonicalize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
)
name
=
kwargs
.
pop
(
'name'
,
None
)
or
lopt
.
__name__
compile
.
optdb
[
'canonicalize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
,
**
kwargs
)
return
lopt
...
...
@@ -463,8 +463,9 @@ def register_stabilize(lopt, *tags, **kwargs):
return
register_stabilize
(
inner_lopt
,
lopt
,
*
tags
,
**
kwargs
)
return
register
else
:
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
))
or
lopt
.
__name__
compile
.
optdb
[
'stabilize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
)
name
=
kwargs
.
pop
(
'name'
,
None
)
or
lopt
.
__name__
compile
.
optdb
[
'stabilize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
,
**
kwargs
)
return
lopt
...
...
@@ -474,9 +475,9 @@ def register_specialize(lopt, *tags, **kwargs):
return
register_specialize
(
inner_lopt
,
lopt
,
*
tags
,
**
kwargs
)
return
register
else
:
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
)
)
or
lopt
.
__name__
name
=
kwargs
.
pop
(
'name'
,
None
)
or
lopt
.
__name__
compile
.
optdb
[
'specialize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
)
*
tags
,
**
kwargs
)
return
lopt
...
...
@@ -502,11 +503,6 @@ def register_specialize_device(lopt, *tags, **kwargs):
return
lopt
# Register merge_optimizer as a global opt during canonicalize
compile
.
optdb
[
'canonicalize'
]
.
register
(
'canon_merge'
,
merge_optimizer
,
'fast_run'
,
final_opt
=
True
)
#####################
# Dot optimizations #
#####################
...
...
@@ -1414,6 +1410,172 @@ theano.compile.mode.optdb.register('ShapeOpt', ShapeOptimizer(),
0.1
,
'fast_run'
,
'fast_compile'
)
def
local_elemwise_alloc_op
(
ElemwiseOP
,
AllocOP
,
DimShuffleOP
):
def
local_elemwise_alloc
(
node
):
"""
elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION))
-> elemwise(x, y.TensorType(BROADCAST CONDITION))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION))
-> elemwise(x.dimshuffle(...), y.TensorType(BROADCAST CONDITION))
BROADCAST CONDITION: the condition is that the one input that are
not to be optimized to have the same broadcast pattern as the
output.
We can change the alloc by a dimshuffle as the elemwise
already have the shape info. The dimshuffle will be faster
to exec.
"""
if
not
isinstance
(
node
.
op
,
ElemwiseOP
):
return
False
if
len
(
node
.
outputs
)
>
1
:
# Ensure all outputs have the same broadcast pattern
# This is a supposition that I'm not sure is always true.
assert
all
([
o
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
o
in
node
.
outputs
[
1
:]])
# The broadcast pattern of the ouptut must match the broadcast
# pattern of at least one of the inputs.
if
not
any
([
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
i
in
node
.
inputs
]):
return
False
def
dimshuffled_alloc
(
i
):
return
(
isinstance
(
i
.
owner
.
op
,
DimShuffleOP
)
and
i
.
owner
.
inputs
[
0
]
.
owner
and
isinstance
(
i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
AllocOP
))
# At least one input must have an owner that is either a AllocOP or a
# DimShuffleOP with an owner that is a AllocOP -- otherwise there is
# nothing to optimize.
if
not
any
([
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))
for
i
in
node
.
inputs
]):
return
False
# Search for input that we can use as a baseline for the dimensions.
assert_op_idx
=
-
1
for
idx
,
i
in
enumerate
(
node
.
inputs
):
if
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
:
# Prefer an input that is not a AllocOP nor a DimShuffleOP of a
# AllocOP so that all allocs can be optimized.
if
not
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))):
assert_op_idx
=
idx
break
# It may be the case that only AllocOP and DimShuffleOP of AllocOP exist.
if
assert_op_idx
<
0
:
# We want to optimize as many allocs as possible. When
# there is more than one then do all but one. number of
# inputs with alloc or dimshuffle alloc
l2
=
[
i
for
i
in
node
.
inputs
if
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
)))]
# If only 1 alloc or dimshuffle alloc, it is the one we
# will use for the shape. So no alloc would be removed.
if
len
(
l2
)
>
1
:
# l containt inputs with alloc or dimshuffle alloc
# only. Its length will always be at least one, as we
# checked that before
l
=
[
idx
for
idx
,
i
in
enumerate
(
node
.
inputs
)
if
i
.
broadcastable
==
node
.
outputs
[
0
]
.
broadcastable
]
assert_op_idx
=
l
[
0
]
# The first one is as good as any to use.
else
:
# Nothing would be optimized!
return
False
assert_op
=
node
.
inputs
[
assert_op_idx
]
cmp_op
=
assert_op
new_i
=
[]
same_shape
=
node
.
fgraph
.
shape_feature
.
same_shape
for
i
in
node
.
inputs
:
# Remove alloc
if
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
AllocOP
)
and
i
.
owner
.
inputs
[
0
]
.
type
!=
i
.
owner
.
outputs
[
0
]
.
type
):
# when i.owner.inputs[0].type == i.owner.outputs[0].type we
# will remove that alloc later
assert
i
.
type
.
ndim
==
cmp_op
.
ndim
if
(
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
and
not
same_shape
(
i
,
cmp_op
)):
assert_op
=
assert_
(
assert_op
,
*
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
if
not
i
.
type
.
broadcastable
[
idx
]])
new_i
.
append
(
i
.
owner
.
inputs
[
0
])
# Remove Alloc in DimShuffle
elif
i
.
owner
and
dimshuffled_alloc
(
i
):
assert
i
.
type
.
ndim
==
cmp_op
.
type
.
ndim
if
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
:
assert_cond
=
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
if
not
i
.
type
.
broadcastable
[
idx
]
and
not
same_shape
(
i
,
cmp_op
,
idx
,
idx
)]
if
assert_cond
:
assert_op
=
assert_
(
assert_op
,
*
assert_cond
)
alloc_input
=
i
.
owner
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
if
alloc_input
.
ndim
!=
i
.
owner
.
inputs
[
0
]
.
ndim
:
# The alloc can add dimension to the value
# We add a dimshuffle to add them.
# We let later optimization merge the multiple dimshuffle
nb_dim_to_add
=
i
.
owner
.
inputs
[
0
]
.
ndim
-
alloc_input
.
ndim
alloc_input
=
alloc_input
.
dimshuffle
(
[
'x'
]
*
nb_dim_to_add
+
list
(
range
(
alloc_input
.
ndim
)))
# We need to keep the dimshuffle. It could swap axes or
# add dimensions anywhere.
r_i
=
i
.
owner
.
op
(
alloc_input
)
# Copy stack trace from i to new_i
copy_stack_trace
(
i
,
r_i
)
new_i
.
append
(
r_i
)
else
:
new_i
.
append
(
i
)
new_i
[
assert_op_idx
]
=
assert_op
ret
=
node
.
op
(
*
new_i
,
return_list
=
True
)
# Copy over stack trace from previous outputs to new outputs.
copy_stack_trace
(
node
.
outputs
,
ret
)
return
ret
return
local_elemwise_alloc
# TODO, global optimizer that lift the assert to the beginning of the graph.
# TODO, optimize all inputs when possible -- currently when all inputs have
# an alloc all but one is optimized.
local_elemwise_alloc
=
register_specialize
(
gof
.
local_optimizer
([
T
.
Elemwise
])(
local_elemwise_alloc_op
(
T
.
Elemwise
,
T
.
Alloc
,
T
.
DimShuffle
)),
'local_alloc_elemwise'
)
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
"DEPRECATED: If True, enable the experimental"
" optimization local_alloc_elemwise."
" Generates error if not True. Use"
" optimizer_excluding=local_alloc_elemwise"
" to dsiable."
,
theano
.
configparser
.
BoolParam
(
True
,
is_valid
=
lambda
x
:
x
),
in_c_key
=
False
)
# False could make the graph faster but not as safe.
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise_assert'
,
"When the local_alloc_elemwise is applied, add"
" an assert to highlight shape errors."
,
theano
.
configparser
.
BoolParam
(
True
),
in_c_key
=
False
)
@gof.local_optimizer
([
T
.
Elemwise
])
def
local_fill_sink
(
node
):
"""
...
...
@@ -1443,7 +1605,6 @@ def local_fill_sink(node):
# The newly created node c doesn't has 'clients',
# so this iteration is took place with node.outputs[0]
replacements
=
{
node
.
outputs
[
0
]:
c
}
all_clients_replaced
=
True
for
client
,
cl_idx
in
node
.
outputs
[
0
]
.
clients
:
if
(
hasattr
(
client
,
'op'
)
and
isinstance
(
client
.
op
,
T
.
Elemwise
)
and
...
...
@@ -1456,13 +1617,8 @@ def local_fill_sink(node):
new_client
.
owner
.
outputs
[
0
]
.
clients
=
client
.
outputs
[
0
]
.
clients
r
=
local_fill_sink
.
transform
(
new_client
.
owner
)
if
not
r
:
all_clients_replaced
=
False
continue
replacements
.
update
(
r
)
else
:
all_clients_replaced
=
False
if
all_clients_replaced
:
replacements
.
pop
(
node
.
outputs
[
0
],
None
)
return
replacements
register_canonicalize
(
local_fill_sink
)
...
...
@@ -1470,7 +1626,7 @@ register_canonicalize(local_fill_sink)
@register_specialize
@register_stabilize
@register_canonicalize
# @register_canonicalize # We make full pass after the canonizer phase.
@gof.local_optimizer
([
T
.
fill
])
def
local_fill_to_alloc
(
node
):
"""fill(s,v) -> alloc(v, shape(s))
...
...
@@ -1510,7 +1666,18 @@ def local_fill_to_alloc(node):
node
,)
# theano.printing.debugprint(node.outputs[0], file='str'))
return
rval
# Register this after stabilize at 1.5 to make sure stabilize don't
# get affected by less canonicalized graph due to alloc.
compile
.
optdb
.
register
(
'local_fill_to_alloc'
,
in2out
(
local_fill_to_alloc
),
1.51
,
'fast_run'
)
# Needed to clean some extra alloc added by local_fill_to_alloc
compile
.
optdb
.
register
(
'local_elemwise_alloc'
,
in2out
(
local_elemwise_alloc
),
1.52
,
'fast_run'
)
@register_canonicalize
(
"fast_compile"
)
@gof.local_optimizer
([
T
.
fill
])
def
local_useless_fill
(
node
):
"""fill(s,v) -> v
...
...
@@ -1526,9 +1693,6 @@ def local_useless_fill(node):
# this is a useless fill, erase it.
# also, we don't need to copy over any stack traces here
return
[
v
]
compile
.
optdb
[
'canonicalize'
]
.
register
(
'local_useless_fill'
,
in2out
(
local_useless_fill
),
1.1
,
'fast_compile'
)
@register_specialize
...
...
@@ -2009,172 +2173,6 @@ compile.optdb['specialize'].register('local_remove_all_assert',
'unsafe'
,
use_db_name_as_tag
=
False
)
def
local_elemwise_alloc_op
(
ElemwiseOP
,
AllocOP
,
DimShuffleOP
):
def
local_elemwise_alloc
(
node
):
"""
elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION))
-> elemwise(x, y.TensorType(BROADCAST CONDITION))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION))
-> elemwise(x.dimshuffle(...), y.TensorType(BROADCAST CONDITION))
BROADCAST CONDITION: the condition is that the one input that are
not to be optimized to have the same broadcast pattern as the
output.
We can change the alloc by a dimshuffle as the elemwise
already have the shape info. The dimshuffle will be faster
to exec.
"""
if
not
isinstance
(
node
.
op
,
ElemwiseOP
):
return
False
if
len
(
node
.
outputs
)
>
1
:
# Ensure all outputs have the same broadcast pattern
# This is a supposition that I'm not sure is always true.
assert
all
([
o
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
o
in
node
.
outputs
[
1
:]])
# The broadcast pattern of the ouptut must match the broadcast
# pattern of at least one of the inputs.
if
not
any
([
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
i
in
node
.
inputs
]):
return
False
def
dimshuffled_alloc
(
i
):
return
(
isinstance
(
i
.
owner
.
op
,
DimShuffleOP
)
and
i
.
owner
.
inputs
[
0
]
.
owner
and
isinstance
(
i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
AllocOP
))
# At least one input must have an owner that is either a AllocOP or a
# DimShuffleOP with an owner that is a AllocOP -- otherwise there is
# nothing to optimize.
if
not
any
([
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))
for
i
in
node
.
inputs
]):
return
False
# Search for input that we can use as a baseline for the dimensions.
assert_op_idx
=
-
1
for
idx
,
i
in
enumerate
(
node
.
inputs
):
if
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
:
# Prefer an input that is not a AllocOP nor a DimShuffleOP of a
# AllocOP so that all allocs can be optimized.
if
not
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))):
assert_op_idx
=
idx
break
# It may be the case that only AllocOP and DimShuffleOP of AllocOP exist.
if
assert_op_idx
<
0
:
# We want to optimize as many allocs as possible. When
# there is more than one then do all but one. number of
# inputs with alloc or dimshuffle alloc
l2
=
[
i
for
i
in
node
.
inputs
if
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
)))]
# If only 1 alloc or dimshuffle alloc, it is the one we
# will use for the shape. So no alloc would be removed.
if
len
(
l2
)
>
1
:
# l containt inputs with alloc or dimshuffle alloc
# only. Its length will always be at least one, as we
# checked that before
l
=
[
idx
for
idx
,
i
in
enumerate
(
node
.
inputs
)
if
i
.
broadcastable
==
node
.
outputs
[
0
]
.
broadcastable
]
assert_op_idx
=
l
[
0
]
# The first one is as good as any to use.
else
:
# Nothing would be optimized!
return
False
assert_op
=
node
.
inputs
[
assert_op_idx
]
cmp_op
=
assert_op
new_i
=
[]
same_shape
=
node
.
fgraph
.
shape_feature
.
same_shape
for
i
in
node
.
inputs
:
# Remove alloc
if
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
AllocOP
)
and
i
.
owner
.
inputs
[
0
]
.
type
!=
i
.
owner
.
outputs
[
0
]
.
type
):
# when i.owner.inputs[0].type == i.owner.outputs[0].type we
# will remove that alloc later
assert
i
.
type
.
ndim
==
cmp_op
.
ndim
if
(
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
and
not
same_shape
(
i
,
cmp_op
)):
assert_op
=
assert_
(
assert_op
,
*
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
if
not
i
.
type
.
broadcastable
[
idx
]])
new_i
.
append
(
i
.
owner
.
inputs
[
0
])
# Remove Alloc in DimShuffle
elif
i
.
owner
and
dimshuffled_alloc
(
i
):
assert
i
.
type
.
ndim
==
cmp_op
.
type
.
ndim
if
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
:
assert_cond
=
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
if
not
i
.
type
.
broadcastable
[
idx
]
and
not
same_shape
(
i
,
cmp_op
,
idx
,
idx
)]
if
assert_cond
:
assert_op
=
assert_
(
assert_op
,
*
assert_cond
)
alloc_input
=
i
.
owner
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
if
alloc_input
.
ndim
!=
i
.
owner
.
inputs
[
0
]
.
ndim
:
# The alloc can add dimension to the value
# We add a dimshuffle to add them.
# We let later optimization merge the multiple dimshuffle
nb_dim_to_add
=
i
.
owner
.
inputs
[
0
]
.
ndim
-
alloc_input
.
ndim
alloc_input
=
alloc_input
.
dimshuffle
(
[
'x'
]
*
nb_dim_to_add
+
list
(
range
(
alloc_input
.
ndim
)))
# We need to keep the dimshuffle. It could swap axes or
# add dimensions anywhere.
r_i
=
i
.
owner
.
op
(
alloc_input
)
# Copy stack trace from i to new_i
copy_stack_trace
(
i
,
r_i
)
new_i
.
append
(
r_i
)
else
:
new_i
.
append
(
i
)
new_i
[
assert_op_idx
]
=
assert_op
ret
=
node
.
op
(
*
new_i
,
return_list
=
True
)
# Copy over stack trace from previous outputs to new outputs.
copy_stack_trace
(
node
.
outputs
,
ret
)
return
ret
return
local_elemwise_alloc
# TODO, global optimizer that lift the assert to the beginning of the graph.
# TODO, optimize all inputs when possible -- currently when all inputs have
# an alloc all but one is optimized.
local_elemwise_alloc
=
register_specialize
(
gof
.
local_optimizer
([
T
.
Elemwise
])(
local_elemwise_alloc_op
(
T
.
Elemwise
,
T
.
Alloc
,
T
.
DimShuffle
)),
'local_alloc_elemwise'
)
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
"DEPRECATED: If True, enable the experimental"
" optimization local_alloc_elemwise."
" Generates error if not True. Use"
" optimizer_excluding=local_alloc_elemwise"
" to dsiable."
,
theano
.
configparser
.
BoolParam
(
True
,
is_valid
=
lambda
x
:
x
),
in_c_key
=
False
)
# False could make the graph faster but not as safe.
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise_assert'
,
"When the local_alloc_elemwise is applied, add"
" an assert to highlight shape errors."
,
theano
.
configparser
.
BoolParam
(
True
),
in_c_key
=
False
)
#######################
# Constant Canonicalization
############################
...
...
@@ -4018,7 +4016,9 @@ class Canonizer(gof.LocalOptimizer):
"""
if
isinstance
(
v
,
Variable
):
try
:
return
get_scalar_constant_value
(
v
)
# As the constant folding is in the canonicalize phase,
# We don't need to check all the graph each time.
return
get_scalar_constant_value
(
v
,
only_process_constants
=
True
)
except
NotScalarConstantError
:
return
None
else
:
...
...
@@ -5467,9 +5467,6 @@ def local_greedy_distributor(node):
return
[
rval
]
@register_canonicalize
(
'fast_compile'
)
@register_stabilize
(
'fast_compile'
)
@register_specialize
(
'fast_compile'
)
@gof.local_optimizer
(
None
)
def
constant_folding
(
node
):
for
input
in
node
.
inputs
:
...
...
@@ -5519,6 +5516,13 @@ def constant_folding(node):
return
rval
topo_constant_folding
=
in2out
(
constant_folding
,
ignore_newtrees
=
True
,
name
=
"topo_constant_folding"
)
register_canonicalize
(
topo_constant_folding
,
'fast_compile'
,
final_opt
=
True
)
register_stabilize
(
topo_constant_folding
,
'fast_compile'
,
final_opt
=
True
)
register_specialize
(
topo_constant_folding
,
'fast_compile'
,
final_opt
=
True
)
def
_is_1
(
expr
):
"""
...
...
@@ -5758,7 +5762,7 @@ def local_log_erfc(node):
# sqrt(pi)*-x/(1-1/(2*x**2)+3/(4*x**4)-15/(8*x**6)))
# for float64: threshold=26.63 see at the end of the fct for the explaination
# for float32: threshold=9.3 see at the end of the fct for the explaination
# TODO: remove the contraint that there are only 2 inputs to
mul and
exp(x**2)
# TODO: remove the contraint that there are only 2 inputs to exp(x**2)
# is the second.
# TODO: at the test point 10 in float32, there is instability in the original
# value. The original gives -30.0, the stab -20.1 and in float64 -18.1.
...
...
@@ -5779,20 +5783,23 @@ def local_grad_log_erfc_neg(node):
# The mul is optional.
if
node
.
inputs
[
0
]
.
owner
.
op
!=
T
.
mul
:
mul
=
None
y
=
1
y
=
[]
if
not
node
.
inputs
[
0
]
.
owner
or
node
.
inputs
[
0
]
.
owner
.
op
!=
T
.
exp
:
return
False
exp
=
node
.
inputs
[
0
]
else
:
mul
=
node
.
inputs
[
0
]
if
mul
.
owner
.
inputs
[
0
]
.
owner
or
len
(
mul
.
owner
.
inputs
)
!=
2
:
return
False
y
=
mul
.
owner
.
inputs
[
0
]
if
(
not
mul
.
owner
.
inputs
[
1
]
.
owner
or
mul
.
owner
.
inputs
[
1
]
.
owner
.
op
!=
T
.
exp
):
return
False
exp
=
mul
.
owner
.
inputs
[
1
]
exp
=
None
for
idx
,
inp
in
enumerate
(
mul
.
owner
.
inputs
):
if
inp
.
owner
and
inp
.
owner
.
op
==
T
.
exp
:
exp
=
inp
break
if
len
(
mul
.
owner
.
inputs
)
==
2
:
y
=
[
mul
.
owner
.
inputs
[
1
-
idx
]]
else
:
y
=
mul
.
owner
.
inputs
[:]
del
y
[
idx
]
del
mul
if
not
exp
.
owner
.
inputs
[
0
]
.
owner
:
return
False
...
...
@@ -5894,9 +5901,10 @@ def local_grad_log_erfc_neg(node):
# threshold = 10.1
elif
x
.
dtype
==
'float64'
:
threshold
=
26.641747557
ret
=
T
.
switch
(
x
<
threshold
,
true_div_no_mul
,
stab_value
)
*
y
ret
=
T
.
switch
(
x
<
threshold
,
true_div_no_mul
,
stab_value
)
if
y
:
ret
=
T
.
mul
(
ret
,
*
y
)
ret
.
values_eq_approx
=
values_eq_approx_remove_inf_nan
return
[
ret
]
"""
The libm used for the test is amdlibm
...
...
theano/tensor/signal/downsample.py
浏览文件 @
f0bd940e
...
...
@@ -256,7 +256,10 @@ class DownsampleFactorMax(Op):
raise
TypeError
()
# TODO: consider restricting the dtype?
x
=
tensor
.
as_tensor_variable
(
x
)
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
# If the input shape are broadcastable we can have 0 in the output shape
broad
=
x
.
broadcastable
[:
2
]
+
(
False
,
False
)
out
=
tensor
.
TensorType
(
x
.
dtype
,
broad
)
return
gof
.
Apply
(
self
,
[
x
],
[
out
()])
def
perform
(
self
,
node
,
inp
,
out
):
x
,
=
inp
...
...
theano/tensor/signal/tests/test_downsample.py
浏览文件 @
f0bd940e
...
...
@@ -801,6 +801,16 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
[
image_val
,
maxout_val
,
gz_val
],
MaxPoolGrad
,
warn
=
False
)
# checking with broadcastable input
image
=
tensor
.
tensor
(
dtype
=
'float64'
,
broadcastable
=
(
False
,
False
,
True
,
True
))
image_val
=
rng
.
rand
(
4
,
6
,
1
,
1
)
self
.
_compile_and_check
(
[
image
],
[
DownsampleFactorMax
((
2
,
2
),
ignore_border
=
True
,
padding
=
(
0
,
0
))(
image
)],
[
image_val
],
DownsampleFactorMax
)
def
test_opt_max_to_average
(
self
):
im
=
theano
.
tensor
.
tensor4
()
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
f0bd940e
...
...
@@ -481,7 +481,7 @@ class test_canonize(unittest.TestCase):
mode
=
compile
.
mode
.
get_default_mode
()
opt
=
gof
.
Query
([
"canonicalize"
])
opt
=
opt
.
including
(
'ShapeOpt'
)
opt
=
opt
.
including
(
'ShapeOpt'
,
'local_fill_to_alloc'
)
opt
=
opt
.
excluding
(
'local_elemwise_fusion'
)
mode
=
mode
.
__class__
(
linker
=
mode
.
linker
,
optimizer
=
opt
)
...
...
@@ -4021,7 +4021,8 @@ class T_Rebroadcast(unittest.TestCase):
class
T_useless_elemwise
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
mode
=
theano
.
compile
.
get_default_mode
()
.
including
(
'canonicalize'
)
self
.
mode
=
theano
.
compile
.
get_default_mode
()
.
including
(
'canonicalize'
,
'local_fill_to_alloc'
)
def
test_eq
(
self
):
x
=
T
.
dmatrix
()
...
...
@@ -4545,7 +4546,7 @@ class T_local_erfc(unittest.TestCase):
# test that we work without the mul
f
=
theano
.
function
([
x
],
T
.
exp
(
T
.
neg
(
T
.
sqr
(
x
)))
/
T
.
erfc
(
x
),
mode
=
mode
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
3
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
2
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
f
.
maker
.
fgraph
.
outputs
[
0
]
.
dtype
==
theano
.
config
.
floatX
assert
all
(
numpy
.
isfinite
(
f
(
val
)))
...
...
@@ -4558,7 +4559,7 @@ class T_local_erfc(unittest.TestCase):
# test that we work without the sqr and neg
f
=
theano
.
function
([
x
],
T
.
exp
(
T
.
mul
(
-
1
,
x
,
x
))
/
T
.
erfc
(
x
),
mode
=
mode
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
2
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
1
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
f
.
maker
.
fgraph
.
outputs
[
0
]
.
dtype
==
theano
.
config
.
floatX
assert
all
(
numpy
.
isfinite
(
f
(
val
)))
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论