Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
f0bd940e
提交
f0bd940e
authored
10月 19, 2015
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3477 from nouiz/crash_gpu
Crash gpu and opt speed up
上级
dab522df
7fce44ca
隐藏空白字符变更
内嵌
并排
正在显示
15 个修改的文件
包含
403 行增加
和
273 行删除
+403
-273
optimization.txt
doc/extending/optimization.txt
+3
-3
mode.py
theano/compile/mode.py
+10
-1
cc.py
theano/gof/cc.py
+2
-2
opt.py
theano/gof/opt.py
+134
-42
optdb.py
theano/gof/optdb.py
+14
-7
basic_ops.py
theano/sandbox/cuda/basic_ops.py
+1
-1
cudnn_helper.h
theano/sandbox/cuda/cudnn_helper.h
+3
-3
dnn.py
theano/sandbox/cuda/dnn.py
+1
-1
opt.py
theano/sandbox/cuda/opt.py
+1
-1
type.py
theano/sandbox/cuda/type.py
+1
-1
basic.py
theano/tensor/basic.py
+2
-2
opt.py
theano/tensor/opt.py
+212
-204
downsample.py
theano/tensor/signal/downsample.py
+4
-1
test_downsample.py
theano/tensor/signal/tests/test_downsample.py
+10
-0
test_opt.py
theano/tensor/tests/test_opt.py
+5
-4
没有找到文件。
doc/extending/optimization.txt
浏览文件 @
f0bd940e
...
@@ -212,11 +212,11 @@ optimization you wrote. For example, consider the following:
...
@@ -212,11 +212,11 @@ optimization you wrote. For example, consider the following:
Nothing happened here. The reason is: ``add(y, z) != add(y,
Nothing happened here. The reason is: ``add(y, z) != add(y,
z)``. That is the case for efficiency reasons. To fix this problem we
z)``. That is the case for efficiency reasons. To fix this problem we
first need to merge the parts of the graph that represent the same
first need to merge the parts of the graph that represent the same
computation, using the ``
merge_o
ptimizer`` defined in
computation, using the ``
MergeO
ptimizer`` defined in
``theano.gof.opt``.
``theano.gof.opt``.
>>> from theano.gof.opt import
merge_o
ptimizer
>>> from theano.gof.opt import
MergeO
ptimizer
>>>
merge_optimizer
.optimize(e) # doctest: +ELLIPSIS
>>>
MergeOptimizer()
.optimize(e) # doctest: +ELLIPSIS
(0, ..., None, None, {}, 1, 0)
(0, ..., None, None, {}, 1, 0)
>>> e
>>> e
[true_div(mul(*1 -> add(y, z), x), *1)]
[true_div(mul(*1 -> add(y, z), x), *1)]
...
...
theano/compile/mode.py
浏览文件 @
f0bd940e
...
@@ -198,8 +198,17 @@ optdb.register('merge1', gof.MergeOptimizer(),
...
@@ -198,8 +198,17 @@ optdb.register('merge1', gof.MergeOptimizer(),
0
,
'fast_run'
,
'fast_compile'
,
'merge'
)
0
,
'fast_run'
,
'fast_compile'
,
'merge'
)
# rearranges elemwise expressions
# rearranges elemwise expressions
optdb
.
register
(
'canonicalize'
,
gof
.
EquilibriumDB
(),
optdb
.
register
(
'canonicalize'
,
gof
.
EquilibriumDB
(
ignore_newtrees
=
False
),
1
,
'fast_run'
,
'fast_compile'
)
1
,
'fast_run'
,
'fast_compile'
)
# Register in the canonizer Equilibrium as a clean up opt the merge opt.
# Without this, as the equilibrium have ignore_newtrees=False, we
# won't merge all nodes if it is set as a global optimizer with
# final_opt=True.
# We need a new instance of MergeOptimizer to don't have its name
# changed by other usage of it.
optdb
[
'canonicalize'
]
.
register
(
"merge"
,
gof
.
opt
.
MergeOptimizer
(),
'fast_run'
,
"fast_compile"
,
cleanup
=
True
)
optdb
.
register
(
'merge1.2'
,
gof
.
MergeOptimizer
(),
optdb
.
register
(
'merge1.2'
,
gof
.
MergeOptimizer
(),
1.2
,
'fast_run'
,
'fast_compile'
,
'merge'
)
1.2
,
'fast_run'
,
'fast_compile'
,
'merge'
)
...
...
theano/gof/cc.py
浏览文件 @
f0bd940e
...
@@ -547,6 +547,7 @@ class CLinker(link.Linker):
...
@@ -547,6 +547,7 @@ class CLinker(link.Linker):
if
no_recycling
is
None
:
if
no_recycling
is
None
:
no_recycling
=
[]
no_recycling
=
[]
if
self
.
fgraph
is
not
None
and
self
.
fgraph
is
not
fgraph
:
if
self
.
fgraph
is
not
None
and
self
.
fgraph
is
not
fgraph
:
# A linker can be tied to only one FunctionGraph.
return
type
(
self
)(
self
.
schedule
)
.
accept
(
fgraph
,
no_recycling
)
return
type
(
self
)(
self
.
schedule
)
.
accept
(
fgraph
,
no_recycling
)
self
.
fgraph
=
fgraph
self
.
fgraph
=
fgraph
self
.
fetch_variables
()
self
.
fetch_variables
()
...
@@ -1750,14 +1751,13 @@ class OpWiseCLinker(link.LocalLinker):
...
@@ -1750,14 +1751,13 @@ class OpWiseCLinker(link.LocalLinker):
if
no_recycling
is
None
:
if
no_recycling
is
None
:
no_recycling
=
[]
no_recycling
=
[]
if
self
.
fgraph
is
not
None
and
self
.
fgraph
is
not
fgraph
:
if
self
.
fgraph
is
not
None
and
self
.
fgraph
is
not
fgraph
:
# A linker can be tied to only one FunctionGraph.
return
type
(
self
)(
return
type
(
self
)(
fallback_on_perform
=
self
.
fallback_on_perform
,
fallback_on_perform
=
self
.
fallback_on_perform
,
allow_gc
=
self
.
allow_gc
,
allow_gc
=
self
.
allow_gc
,
nice_errors
=
self
.
nice_errors
,
nice_errors
=
self
.
nice_errors
,
schedule
=
self
.
schedule
,
schedule
=
self
.
schedule
,
)
.
accept
(
fgraph
,
no_recycling
)
)
.
accept
(
fgraph
,
no_recycling
)
# raise Exception("Cannot accept from a Linker that is
# already tied to another FunctionGraph.")
self
.
fgraph
=
fgraph
self
.
fgraph
=
fgraph
self
.
no_recycling
=
no_recycling
self
.
no_recycling
=
no_recycling
return
self
return
self
...
...
theano/gof/opt.py
浏览文件 @
f0bd940e
...
@@ -873,8 +873,23 @@ class MergeOptimizer(Optimizer):
...
@@ -873,8 +873,23 @@ class MergeOptimizer(Optimizer):
if
i
[
1
]
>
0
:
if
i
[
1
]
>
0
:
print
(
i
)
print
(
i
)
@staticmethod
merge_optimizer
=
MergeOptimizer
()
def
merge_profile
(
prof1
,
prof2
):
def
merge_none_number
(
v1
,
v2
):
if
v1
is
None
:
return
v2
if
v2
is
None
:
return
v1
return
v1
+
v2
nb_fail
=
prof1
[
0
]
+
prof2
[
0
]
replace_time
=
prof1
[
1
]
+
prof2
[
1
]
validate_time
=
merge_none_number
(
prof1
[
2
],
prof2
[
2
])
callback_time
=
merge_none_number
(
prof1
[
3
],
prof2
[
3
])
callbacks_time
=
merge_dict
(
prof1
[
4
],
prof2
[
4
])
nb_merged
=
prof1
[
5
]
+
prof2
[
5
]
nb_constant
=
prof1
[
6
]
+
prof2
[
6
]
return
(
nb_fail
,
replace_time
,
validate_time
,
callback_time
,
callbacks_time
,
nb_merged
,
nb_constant
)
def
is_same_graph_with_merge
(
var1
,
var2
,
givens
=
None
):
def
is_same_graph_with_merge
(
var1
,
var2
,
givens
=
None
):
...
@@ -899,7 +914,7 @@ def is_same_graph_with_merge(var1, var2, givens=None):
...
@@ -899,7 +914,7 @@ def is_same_graph_with_merge(var1, var2, givens=None):
for
to_replace
,
replace_by
in
iteritems
(
givens
):
for
to_replace
,
replace_by
in
iteritems
(
givens
):
fgraph
.
replace
(
to_replace
,
replace_by
)
fgraph
.
replace
(
to_replace
,
replace_by
)
# Perform merge optimization.
# Perform merge optimization.
merge_optimizer
.
optimize
(
fgraph
)
MergeOptimizer
()
.
optimize
(
fgraph
)
# When two variables perform the same computations, they will have the same
# When two variables perform the same computations, they will have the same
# owner in the optimized graph.
# owner in the optimized graph.
# We need to be careful with the special case where the owner is None,
# We need to be careful with the special case where the owner is None,
...
@@ -1165,7 +1180,7 @@ class FromFunctionLocalOptimizer(LocalOptimizer):
...
@@ -1165,7 +1180,7 @@ class FromFunctionLocalOptimizer(LocalOptimizer):
id
(
self
)),
file
=
stream
)
id
(
self
)),
file
=
stream
)
def
local_optimizer
(
tracks
,
inplace
=
False
):
def
local_optimizer
(
tracks
,
inplace
=
False
,
requirements
=
()
):
def
decorator
(
f
):
def
decorator
(
f
):
"""
"""
WRITEME
WRITEME
...
@@ -1177,12 +1192,13 @@ def local_optimizer(tracks, inplace=False):
...
@@ -1177,12 +1192,13 @@ def local_optimizer(tracks, inplace=False):
for
t
in
tracks
:
for
t
in
tracks
:
if
not
(
isinstance
(
t
,
op
.
Op
)
or
issubclass
(
t
,
op
.
PureOp
)):
if
not
(
isinstance
(
t
,
op
.
Op
)
or
issubclass
(
t
,
op
.
PureOp
)):
raise
ValueError
(
"Tracks are op classes or instances"
,
f
.
__module__
,
f
.
__name__
)
raise
ValueError
(
"Tracks are op classes or instances"
,
f
.
__module__
,
f
.
__name__
)
req
uirements
=
()
req
=
requirements
if
inplace
:
if
inplace
:
dh_handler
=
dh
.
DestroyHandler
dh_handler
=
dh
.
DestroyHandler
requirements
=
(
lambda
fgraph
:
req
=
tuple
(
requirements
)
+
(
fgraph
.
attach_feature
(
dh_handler
()),)
lambda
fgraph
:
rval
=
FromFunctionLocalOptimizer
(
f
,
tracks
,
requirements
)
fgraph
.
attach_feature
(
dh_handler
()),)
rval
=
FromFunctionLocalOptimizer
(
f
,
tracks
,
req
)
rval
.
__name__
=
f
.
__name__
rval
.
__name__
=
f
.
__name__
return
rval
return
rval
return
decorator
return
decorator
...
@@ -1974,19 +1990,41 @@ class ChangeTracker:
...
@@ -1974,19 +1990,41 @@ class ChangeTracker:
fgraph
.
change_tracker
=
self
fgraph
.
change_tracker
=
self
def
merge_dict
(
d1
,
d2
):
"""
merge 2 dicts by adding the values.
"""
d
=
d1
.
copy
()
for
k
,
v
in
iteritems
(
d2
):
if
k
in
d
:
d
[
k
]
+=
v
else
:
d
[
k
]
=
v
return
d
class
EquilibriumOptimizer
(
NavigatorOptimizer
):
class
EquilibriumOptimizer
(
NavigatorOptimizer
):
"""
"""
Apply optimizations until equilibrium point.
Apply optimizations until equilibrium point.
Parameters
Parameters
----------
----------
optimizers
optimizers : list or set
List or set of local or global optimizations to apply until equilibrium.
Local or global optimizations to apply until equilibrium.
max_use_ratio
The global optimizer will be run at the start of each iteration before
the local optimizer.
max_use_ratio : int or float
Each optimizer can be applied at most (size of graph * this number)
Each optimizer can be applied at most (size of graph * this number)
times.
times.
ignore_newtrees
ignore_newtrees
See EquilibriumDB ignore_newtrees parameter definition.
See EquilibriumDB ignore_newtrees parameter definition.
final_optimizers
Global optimizers that will be run after each iteration.
cleanup_optimizers
Global optimizers that apply a list of pre determined optimization.
They must not traverse the graph as they are called very frequently.
The MergeOptimizer is one example of optimization that respect this.
They are applied after all global optimizer, then when one local optimizer is applied, then after all final optimizer.
"""
"""
...
@@ -1995,7 +2033,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -1995,7 +2033,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
failure_callback
=
None
,
failure_callback
=
None
,
ignore_newtrees
=
True
,
ignore_newtrees
=
True
,
max_use_ratio
=
None
,
max_use_ratio
=
None
,
final_optimizers
=
None
):
final_optimizers
=
None
,
cleanup_optimizers
=
None
):
super
(
EquilibriumOptimizer
,
self
)
.
__init__
(
super
(
EquilibriumOptimizer
,
self
)
.
__init__
(
None
,
None
,
ignore_newtrees
=
ignore_newtrees
,
ignore_newtrees
=
ignore_newtrees
,
...
@@ -2004,6 +2043,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2004,6 +2043,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self
.
local_optimizers_all
=
[]
self
.
local_optimizers_all
=
[]
self
.
global_optimizers
=
[]
self
.
global_optimizers
=
[]
self
.
final_optimizers
=
[]
self
.
final_optimizers
=
[]
self
.
cleanup_optimizers
=
[]
for
opt
in
optimizers
:
for
opt
in
optimizers
:
if
isinstance
(
opt
,
LocalOptimizer
):
if
isinstance
(
opt
,
LocalOptimizer
):
...
@@ -2016,6 +2056,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2016,6 +2056,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self
.
global_optimizers
.
append
(
opt
)
self
.
global_optimizers
.
append
(
opt
)
if
final_optimizers
:
if
final_optimizers
:
self
.
final_optimizers
=
final_optimizers
self
.
final_optimizers
=
final_optimizers
if
cleanup_optimizers
:
self
.
cleanup_optimizers
=
cleanup_optimizers
self
.
max_use_ratio
=
max_use_ratio
self
.
max_use_ratio
=
max_use_ratio
assert
self
.
max_use_ratio
is
not
None
,
(
assert
self
.
max_use_ratio
is
not
None
,
(
'max_use_ratio has to be a number'
)
'max_use_ratio has to be a number'
)
...
@@ -2039,6 +2081,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2039,6 +2081,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
opt
.
add_requirements
(
fgraph
)
opt
.
add_requirements
(
fgraph
)
for
opt
in
self
.
final_optimizers
:
for
opt
in
self
.
final_optimizers
:
opt
.
add_requirements
(
fgraph
)
opt
.
add_requirements
(
fgraph
)
for
opt
in
self
.
cleanup_optimizers
:
opt
.
add_requirements
(
fgraph
)
def
apply
(
self
,
fgraph
,
start_from
=
None
):
def
apply
(
self
,
fgraph
,
start_from
=
None
):
change_tracker
=
ChangeTracker
()
change_tracker
=
ChangeTracker
()
...
@@ -2066,17 +2110,39 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2066,17 +2110,39 @@ class EquilibriumOptimizer(NavigatorOptimizer):
node_created
=
{}
node_created
=
{}
global_sub_profs
=
[]
global_sub_profs
=
[]
final_sub_profs
=
[]
final_sub_profs
=
[]
cleanup_sub_profs
=
[]
for
opt
in
(
self
.
global_optimizers
+
for
opt
in
(
self
.
global_optimizers
+
list
(
self
.
get_local_optimizers
())
+
list
(
self
.
get_local_optimizers
())
+
self
.
final_optimizers
):
self
.
final_optimizers
+
self
.
cleanup_optimizers
):
global_process_count
.
setdefault
(
opt
,
0
)
global_process_count
.
setdefault
(
opt
,
0
)
time_opts
.
setdefault
(
opt
,
0
)
time_opts
.
setdefault
(
opt
,
0
)
node_created
.
setdefault
(
opt
,
0
)
node_created
.
setdefault
(
opt
,
0
)
def
apply_cleanup
(
profs_dict
):
changed
=
False
for
copt
in
self
.
cleanup_optimizers
:
change_tracker
.
reset
()
nb
=
change_tracker
.
nb_imported
t_opt
=
time
.
time
()
sub_prof
=
copt
.
apply
(
fgraph
)
time_opts
[
copt
]
+=
time
.
time
()
-
t_opt
profs_dict
[
copt
]
.
append
(
sub_prof
)
if
change_tracker
.
changed
:
process_count
.
setdefault
(
copt
,
0
)
process_count
[
copt
]
+=
1
global_process_count
[
copt
]
+=
1
changed
=
True
node_created
[
copt
]
+=
change_tracker
.
nb_imported
-
nb
return
changed
while
changed
and
not
max_use_abort
:
while
changed
and
not
max_use_abort
:
process_count
=
{}
process_count
=
{}
t0
=
time
.
time
()
t0
=
time
.
time
()
changed
=
False
changed
=
False
iter_cleanup_sub_profs
=
{}
for
copt
in
self
.
cleanup_optimizers
:
iter_cleanup_sub_profs
[
copt
]
=
[]
# apply global optimizers
# apply global optimizers
sub_profs
=
[]
sub_profs
=
[]
...
@@ -2101,6 +2167,10 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2101,6 +2167,10 @@ class EquilibriumOptimizer(NavigatorOptimizer):
global_opt_timing
.
append
(
float
(
time
.
time
()
-
t0
))
global_opt_timing
.
append
(
float
(
time
.
time
()
-
t0
))
# apply clean up as global opt can have done changes that
# request that
changed
|=
apply_cleanup
(
iter_cleanup_sub_profs
)
# apply local optimizer
# apply local optimizer
topo_t0
=
time
.
time
()
topo_t0
=
time
.
time
()
q
=
deque
(
graph
.
io_toposort
(
fgraph
.
inputs
,
start_from
))
q
=
deque
(
graph
.
io_toposort
(
fgraph
.
inputs
,
start_from
))
...
@@ -2134,19 +2204,21 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2134,19 +2204,21 @@ class EquilibriumOptimizer(NavigatorOptimizer):
t_opt
=
time
.
time
()
t_opt
=
time
.
time
()
lopt_change
=
self
.
process_node
(
fgraph
,
node
,
lopt
)
lopt_change
=
self
.
process_node
(
fgraph
,
node
,
lopt
)
time_opts
[
lopt
]
+=
time
.
time
()
-
t_opt
time_opts
[
lopt
]
+=
time
.
time
()
-
t_opt
if
lopt_change
:
if
not
lopt_change
:
process_count
.
setdefault
(
lopt
,
0
)
continue
process_count
[
lopt
]
+=
1
process_count
.
setdefault
(
lopt
,
0
)
global_process_count
[
lopt
]
+=
1
process_count
[
lopt
]
+=
1
changed
=
True
global_process_count
[
lopt
]
+=
1
node_created
[
lopt
]
+=
change_tracker
.
nb_imported
-
nb
changed
=
True
if
global_process_count
[
lopt
]
>
max_use
:
node_created
[
lopt
]
+=
change_tracker
.
nb_imported
-
nb
max_use_abort
=
True
changed
|=
apply_cleanup
(
iter_cleanup_sub_profs
)
opt_name
=
(
getattr
(
lopt
,
"name"
,
None
)
or
if
global_process_count
[
lopt
]
>
max_use
:
getattr
(
lopt
,
"__name__"
,
""
))
max_use_abort
=
True
if
node
not
in
fgraph
.
apply_nodes
:
opt_name
=
(
getattr
(
lopt
,
"name"
,
None
)
or
# go to next node
getattr
(
lopt
,
"__name__"
,
""
))
break
if
node
not
in
fgraph
.
apply_nodes
:
# go to next node
break
finally
:
finally
:
self
.
detach_updater
(
fgraph
,
u
)
self
.
detach_updater
(
fgraph
,
u
)
...
@@ -2173,6 +2245,17 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2173,6 +2245,17 @@ class EquilibriumOptimizer(NavigatorOptimizer):
final_sub_profs
.
append
(
sub_profs
)
final_sub_profs
.
append
(
sub_profs
)
global_opt_timing
[
-
1
]
+=
time
.
time
()
-
t_before_final_opt
global_opt_timing
[
-
1
]
+=
time
.
time
()
-
t_before_final_opt
# apply clean up as final opt can have done changes that
# request that
changed
|=
apply_cleanup
(
iter_cleanup_sub_profs
)
# merge clean up profiles during that iteration.
c_sub_profs
=
[]
for
copt
,
sub_profs
in
iteritems
(
iter_cleanup_sub_profs
):
sub_prof
=
sub_profs
[
0
]
for
s_p
in
sub_profs
[
1
:]:
sub_prof
=
copt
.
merge_profile
(
sub_prof
,
s_p
)
c_sub_profs
.
append
(
sub_prof
)
cleanup_sub_profs
.
append
(
c_sub_profs
)
loop_process_count
.
append
(
process_count
)
loop_process_count
.
append
(
process_count
)
loop_timing
.
append
(
float
(
time
.
time
()
-
t0
))
loop_timing
.
append
(
float
(
time
.
time
()
-
t0
))
...
@@ -2188,7 +2271,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2188,7 +2271,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
return
(
self
,
loop_timing
,
loop_process_count
,
return
(
self
,
loop_timing
,
loop_process_count
,
(
start_nb_nodes
,
end_nb_nodes
,
max_nb_nodes
),
(
start_nb_nodes
,
end_nb_nodes
,
max_nb_nodes
),
global_opt_timing
,
nb_nodes
,
time_opts
,
io_toposort_timing
,
global_opt_timing
,
nb_nodes
,
time_opts
,
io_toposort_timing
,
node_created
,
global_sub_profs
,
final_sub_profs
)
node_created
,
global_sub_profs
,
final_sub_profs
,
cleanup_sub_profs
)
def
print_summary
(
self
,
stream
=
sys
.
stdout
,
level
=
0
,
depth
=-
1
):
def
print_summary
(
self
,
stream
=
sys
.
stdout
,
level
=
0
,
depth
=-
1
):
name
=
getattr
(
self
,
'name'
,
None
)
name
=
getattr
(
self
,
'name'
,
None
)
...
@@ -2204,7 +2287,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2204,7 +2287,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
(
opt
,
loop_timing
,
loop_process_count
,
(
opt
,
loop_timing
,
loop_process_count
,
(
start_nb_nodes
,
end_nb_nodes
,
max_nb_nodes
),
(
start_nb_nodes
,
end_nb_nodes
,
max_nb_nodes
),
global_opt_timing
,
nb_nodes
,
time_opts
,
io_toposort_timing
,
global_opt_timing
,
nb_nodes
,
time_opts
,
io_toposort_timing
,
node_created
,
global_sub_profs
,
final_sub_profs
)
=
prof
node_created
,
global_sub_profs
,
final_sub_profs
,
cleanup_sub_profs
)
=
prof
blanc
=
(
' '
*
level
)
blanc
=
(
' '
*
level
)
print
(
blanc
,
"EquilibriumOptimizer"
,
end
=
' '
,
file
=
stream
)
print
(
blanc
,
"EquilibriumOptimizer"
,
end
=
' '
,
file
=
stream
)
...
@@ -2222,6 +2306,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2222,6 +2306,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print
(
blanc
,
" time in global optimizers
%.3
fs"
%
s
,
file
=
stream
)
print
(
blanc
,
" time in global optimizers
%.3
fs"
%
s
,
file
=
stream
)
s
=
sum
([
time_opts
[
o
]
for
o
in
opt
.
final_optimizers
])
s
=
sum
([
time_opts
[
o
]
for
o
in
opt
.
final_optimizers
])
print
(
blanc
,
" time in final optimizers
%.3
fs"
%
s
,
file
=
stream
)
print
(
blanc
,
" time in final optimizers
%.3
fs"
%
s
,
file
=
stream
)
s
=
sum
([
time_opts
[
o
]
for
o
in
opt
.
cleanup_optimizers
])
print
(
blanc
,
" time in cleanup optimizers
%.3
fs"
%
s
,
file
=
stream
)
for
i
in
range
(
len
(
loop_timing
)):
for
i
in
range
(
len
(
loop_timing
)):
lopt
=
""
lopt
=
""
if
loop_process_count
[
i
]:
if
loop_process_count
[
i
]:
...
@@ -2245,7 +2331,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2245,7 +2331,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
process_count
=
{}
process_count
=
{}
for
o
in
(
opt
.
global_optimizers
+
for
o
in
(
opt
.
global_optimizers
+
list
(
opt
.
get_local_optimizers
())
+
list
(
opt
.
get_local_optimizers
())
+
list
(
opt
.
final_optimizers
)):
list
(
opt
.
final_optimizers
)
+
list
(
opt
.
cleanup_optimizers
)):
process_count
.
setdefault
(
o
,
0
)
process_count
.
setdefault
(
o
,
0
)
for
count
in
loop_process_count
:
for
count
in
loop_process_count
:
for
o
,
v
in
iteritems
(
count
):
for
o
,
v
in
iteritems
(
count
):
...
@@ -2275,12 +2362,13 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2275,12 +2362,13 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print
(
blanc
+
" "
,
'
%.3
fs -
%
s'
%
(
t
,
o
),
file
=
stream
)
print
(
blanc
+
" "
,
'
%.3
fs -
%
s'
%
(
t
,
o
),
file
=
stream
)
print
(
file
=
stream
)
print
(
file
=
stream
)
gf_opts
=
[
o
for
o
in
(
opt
.
global_optimizers
+
gf_opts
=
[
o
for
o
in
(
opt
.
global_optimizers
+
list
(
opt
.
final_optimizers
))
list
(
opt
.
final_optimizers
)
+
list
(
opt
.
cleanup_optimizers
))
if
o
.
print_profile
.
func_code
is
not
if
o
.
print_profile
.
func_code
is
not
Optimizer
.
print_profile
.
func_code
]
Optimizer
.
print_profile
.
func_code
]
if
not
gf_opts
:
if
not
gf_opts
:
return
return
print
(
blanc
,
"Global
and final optimizer
"
,
file
=
stream
)
print
(
blanc
,
"Global
, final and clean up optimizers
"
,
file
=
stream
)
for
i
in
range
(
len
(
loop_timing
)):
for
i
in
range
(
len
(
loop_timing
)):
print
(
blanc
,
"Iter
%
d"
%
i
,
file
=
stream
)
print
(
blanc
,
"Iter
%
d"
%
i
,
file
=
stream
)
for
o
,
prof
in
zip
(
opt
.
global_optimizers
,
global_sub_profs
[
i
]):
for
o
,
prof
in
zip
(
opt
.
global_optimizers
,
global_sub_profs
[
i
]):
...
@@ -2293,6 +2381,11 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2293,6 +2381,11 @@ class EquilibriumOptimizer(NavigatorOptimizer):
o
.
print_profile
(
stream
,
prof
,
level
+
2
)
o
.
print_profile
(
stream
,
prof
,
level
+
2
)
except
NotImplementedError
:
except
NotImplementedError
:
print
(
blanc
,
"merge not implemented for "
,
o
)
print
(
blanc
,
"merge not implemented for "
,
o
)
for
o
,
prof
in
zip
(
opt
.
cleanup_optimizers
,
cleanup_sub_profs
[
i
]):
try
:
o
.
print_profile
(
stream
,
prof
,
level
+
2
)
except
NotImplementedError
:
print
(
blanc
,
"merge not implemented for "
,
o
)
@staticmethod
@staticmethod
def
merge_profile
(
prof1
,
prof2
):
def
merge_profile
(
prof1
,
prof2
):
...
@@ -2307,10 +2400,16 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2307,10 +2400,16 @@ class EquilibriumOptimizer(NavigatorOptimizer):
prof2
[
0
]
.
final_optimizers
)
prof2
[
0
]
.
final_optimizers
)
else
:
else
:
final_optimizers
=
None
final_optimizers
=
None
if
len
(
prof1
[
0
]
.
cleanup_optimizers
)
>
0
or
len
(
prof2
[
0
]
.
cleanup_optimizers
)
>
0
:
cleanup_optimizers
=
OrderedSet
(
prof1
[
0
]
.
cleanup_optimizers
)
.
union
(
prof2
[
0
]
.
cleanup_optimizers
)
else
:
cleanup_optimizers
=
None
new_opt
=
EquilibriumOptimizer
(
new_opt
=
EquilibriumOptimizer
(
local_optimizers
.
union
(
global_optimizers
),
local_optimizers
.
union
(
global_optimizers
),
max_use_ratio
=
1
,
max_use_ratio
=
1
,
final_optimizers
=
final_optimizers
)
final_optimizers
=
final_optimizers
,
cleanup_optimizers
=
cleanup_optimizers
)
def
merge_list
(
l1
,
l2
):
def
merge_list
(
l1
,
l2
):
l
=
copy
.
copy
(
l1
)
l
=
copy
.
copy
(
l1
)
...
@@ -2321,15 +2420,6 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2321,15 +2420,6 @@ class EquilibriumOptimizer(NavigatorOptimizer):
l
.
append
(
nb
)
l
.
append
(
nb
)
return
l
return
l
def
merge_dict
(
d1
,
d2
):
d
=
d1
.
copy
()
for
k
,
v
in
iteritems
(
d2
):
if
k
in
d
:
d
[
k
]
+=
v
else
:
d
[
k
]
=
v
return
d
loop_timing
=
merge_list
(
prof1
[
1
],
prof2
[
1
])
loop_timing
=
merge_list
(
prof1
[
1
],
prof2
[
1
])
loop_process_count
=
list
(
prof1
[
2
])
loop_process_count
=
list
(
prof1
[
2
])
...
@@ -2358,6 +2448,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2358,6 +2448,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
node_created
=
merge_dict
(
prof1
[
8
],
prof2
[
8
])
node_created
=
merge_dict
(
prof1
[
8
],
prof2
[
8
])
global_sub_profs
=
merge_list
(
prof1
[
9
],
prof2
[
9
])
global_sub_profs
=
merge_list
(
prof1
[
9
],
prof2
[
9
])
final_sub_profs
=
merge_list
(
prof1
[
10
],
prof2
[
10
])
final_sub_profs
=
merge_list
(
prof1
[
10
],
prof2
[
10
])
cleanup_sub_profs
=
merge_list
(
prof1
[
10
],
prof2
[
10
])
return
(
new_opt
,
return
(
new_opt
,
loop_timing
,
loop_timing
,
loop_process_count
,
loop_process_count
,
...
@@ -2368,7 +2459,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -2368,7 +2459,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
io_toposort_timing
,
io_toposort_timing
,
node_created
,
node_created
,
global_sub_profs
,
global_sub_profs
,
final_sub_profs
)
final_sub_profs
,
cleanup_sub_profs
)
#################
#################
# Utilities #
# Utilities #
...
...
theano/gof/optdb.py
浏览文件 @
f0bd940e
...
@@ -268,28 +268,35 @@ class EquilibriumDB(DB):
...
@@ -268,28 +268,35 @@ class EquilibriumDB(DB):
super
(
EquilibriumDB
,
self
)
.
__init__
()
super
(
EquilibriumDB
,
self
)
.
__init__
()
self
.
ignore_newtrees
=
ignore_newtrees
self
.
ignore_newtrees
=
ignore_newtrees
self
.
__final__
=
{}
self
.
__final__
=
{}
self
.
__cleanup__
=
{}
def
register
(
self
,
name
,
obj
,
*
tags
,
**
kwtags
):
def
register
(
self
,
name
,
obj
,
*
tags
,
**
kwtags
):
if
'final_opt'
in
kwtags
:
final_opt
=
kwtags
.
pop
(
'final_opt'
,
False
)
final_opt
=
kwtags
[
'final_opt'
]
cleanup
=
kwtags
.
pop
(
'cleanup'
,
False
)
kwtags
.
pop
(
'final_opt'
,
None
)
# An opt should not be final and clean up
else
:
assert
not
(
final_opt
and
cleanup
)
final_opt
=
False
super
(
EquilibriumDB
,
self
)
.
register
(
name
,
obj
,
*
tags
,
**
kwtags
)
super
(
EquilibriumDB
,
self
)
.
register
(
name
,
obj
,
*
tags
,
**
kwtags
)
self
.
__final__
[
name
]
=
final_opt
self
.
__final__
[
name
]
=
final_opt
self
.
__cleanup__
[
name
]
=
cleanup
def
query
(
self
,
*
tags
,
**
kwtags
):
def
query
(
self
,
*
tags
,
**
kwtags
):
_opts
=
super
(
EquilibriumDB
,
self
)
.
query
(
*
tags
,
**
kwtags
)
_opts
=
super
(
EquilibriumDB
,
self
)
.
query
(
*
tags
,
**
kwtags
)
final_opts
=
[
o
for
o
in
_opts
if
self
.
__final__
.
get
(
o
.
name
,
False
)]
final_opts
=
[
o
for
o
in
_opts
if
self
.
__final__
.
get
(
o
.
name
,
False
)]
opts
=
[
o
for
o
in
_opts
if
o
not
in
final_opts
]
cleanup_opts
=
[
o
for
o
in
_opts
if
self
.
__cleanup__
.
get
(
o
.
name
,
False
)]
opts
=
[
o
for
o
in
_opts
if
o
not
in
final_opts
and
o
not
in
cleanup_opts
]
if
len
(
final_opts
)
==
0
:
if
len
(
final_opts
)
==
0
:
final_opts
=
None
final_opts
=
None
if
len
(
cleanup_opts
)
==
0
:
cleanup_opts
=
None
return
opt
.
EquilibriumOptimizer
(
return
opt
.
EquilibriumOptimizer
(
opts
,
opts
,
max_use_ratio
=
config
.
optdb
.
max_use_ratio
,
max_use_ratio
=
config
.
optdb
.
max_use_ratio
,
ignore_newtrees
=
self
.
ignore_newtrees
,
ignore_newtrees
=
self
.
ignore_newtrees
,
failure_callback
=
opt
.
NavigatorOptimizer
.
warn_inplace
,
failure_callback
=
opt
.
NavigatorOptimizer
.
warn_inplace
,
final_optimizers
=
final_opts
)
final_optimizers
=
final_opts
,
cleanup_optimizers
=
cleanup_opts
)
class
SequenceDB
(
DB
):
class
SequenceDB
(
DB
):
...
...
theano/sandbox/cuda/basic_ops.py
浏览文件 @
f0bd940e
...
@@ -3622,7 +3622,7 @@ class GpuAllocEmpty(GpuOp):
...
@@ -3622,7 +3622,7 @@ class GpuAllocEmpty(GpuOp):
const_shp
=
tensor
.
get_scalar_constant_value
(
s
)
const_shp
=
tensor
.
get_scalar_constant_value
(
s
)
except
tensor
.
NotScalarConstantError
:
except
tensor
.
NotScalarConstantError
:
const_shp
=
None
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
)
)
bcast
.
append
(
1
==
const_shp
)
otype
=
CudaNdarrayType
(
dtype
=
'float32'
,
broadcastable
=
bcast
)
otype
=
CudaNdarrayType
(
dtype
=
'float32'
,
broadcastable
=
bcast
)
output
=
otype
()
output
=
otype
()
return
sh
,
output
return
sh
,
output
...
...
theano/sandbox/cuda/cudnn_helper.h
浏览文件 @
f0bd940e
...
@@ -48,7 +48,7 @@ cudnnSetTensorNdDescriptor(
...
@@ -48,7 +48,7 @@ cudnnSetTensorNdDescriptor(
int
nbDims
,
int
nbDims
,
const
int
dimA
[],
const
int
dimA
[],
const
int
strideA
[])
{
const
int
strideA
[])
{
if
(
n
d
Dims
!=
4
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
n
b
Dims
!=
4
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
return
cudnnSetTensor4dDescriptorEx
(
return
cudnnSetTensor4dDescriptorEx
(
tensorDesc
,
dataType
,
tensorDesc
,
dataType
,
dimA
[
0
],
dimA
[
1
],
dimA
[
2
],
dimA
[
3
],
dimA
[
0
],
dimA
[
1
],
dimA
[
2
],
dimA
[
3
],
...
@@ -204,7 +204,7 @@ cudnnSetPoolingNdDescriptor(
...
@@ -204,7 +204,7 @@ cudnnSetPoolingNdDescriptor(
int
nbDims
,
int
nbDims
,
const
int
windowDimA
[],
const
int
windowDimA
[],
const
int
paddingA
[],
const
int
paddingA
[],
const
in
strideA
[])
{
const
in
t
strideA
[])
{
if
(
nbDims
!=
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
nbDims
!=
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
paddingA
[
0
]
!=
0
||
paddingA
[
1
]
!=
0
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
paddingA
[
0
]
!=
0
||
paddingA
[
1
]
!=
0
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
return
cudnnSetPoolingDescriptor
(
poolingDesc
,
mode
,
return
cudnnSetPoolingDescriptor
(
poolingDesc
,
mode
,
...
@@ -223,7 +223,7 @@ cudnnGetPoolingNdDescriptor(
...
@@ -223,7 +223,7 @@ cudnnGetPoolingNdDescriptor(
int
strideA
[])
{
int
strideA
[])
{
int
win0
,
win1
,
str0
,
str1
;
int
win0
,
win1
,
str0
,
str1
;
cudnnStatus_t
err
;
cudnnStatus_t
err
;
if
(
n
d
DimsRequested
<
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
if
(
n
b
DimsRequested
<
2
)
return
CUDNN_STATUS_NOT_SUPPORTED
;
err
=
cudnnGetPoolingDescriptor
(
poolingDesc
,
mode
,
&
win0
,
&
win1
,
err
=
cudnnGetPoolingDescriptor
(
poolingDesc
,
mode
,
&
win0
,
&
win1
,
&
str0
,
&
str1
);
&
str0
,
&
str1
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
return
err
;
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
return
err
;
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
f0bd940e
...
@@ -1760,7 +1760,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
...
@@ -1760,7 +1760,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
Subsampling stride (default: (1, 1)).
Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad}
mode : {'max', 'average_inc_pad', 'average_exc_pad}
pad
pad
(pad_h, pad_w) padding information.
(pad_h, pad_w) padding information.
pad_h is the number of zero-valued pixels added to each of the top and
pad_h is the number of zero-valued pixels added to each of the top and
bottom borders.
bottom borders.
pad_w is the number of zero-valued pixels added to each of the left
pad_w is the number of zero-valued pixels added to each of the left
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
f0bd940e
...
@@ -104,7 +104,7 @@ optdb.register('gpu_after_fusion',
...
@@ -104,7 +104,7 @@ optdb.register('gpu_after_fusion',
'gpu'
)
'gpu'
)
# Register merge_optimizer as a global opt
# Register merge_optimizer as a global opt
gpu_optimizer
.
register
(
'gpu_merge'
,
theano
.
gof
.
opt
.
merge_optimizer
,
gpu_optimizer
.
register
(
'gpu_merge'
,
theano
.
gof
.
opt
.
MergeOptimizer
()
,
'fast_run'
,
'fast_compile'
,
final_opt
=
True
)
'fast_run'
,
'fast_compile'
,
final_opt
=
True
)
...
...
theano/sandbox/cuda/type.py
浏览文件 @
f0bd940e
...
@@ -81,7 +81,7 @@ class CudaNdarrayType(Type):
...
@@ -81,7 +81,7 @@ class CudaNdarrayType(Type):
raise
TypeError
(
'
%
s only supports dtype float32 for now. Tried '
raise
TypeError
(
'
%
s only supports dtype float32 for now. Tried '
'using dtype
%
s for variable
%
s'
%
'using dtype
%
s for variable
%
s'
%
(
self
.
__class__
.
__name__
,
dtype
,
name
))
(
self
.
__class__
.
__name__
,
dtype
,
name
))
self
.
broadcastable
=
tuple
(
broadcastable
)
self
.
broadcastable
=
tuple
(
b
ool
(
b
)
for
b
in
b
roadcastable
)
self
.
name
=
name
self
.
name
=
name
self
.
dtype_specs
()
# error checking is done there
self
.
dtype_specs
()
# error checking is done there
...
...
theano/tensor/basic.py
浏览文件 @
f0bd940e
...
@@ -2673,7 +2673,7 @@ class Alloc(gof.Op):
...
@@ -2673,7 +2673,7 @@ class Alloc(gof.Op):
const_shp
=
get_scalar_constant_value
(
s
)
const_shp
=
get_scalar_constant_value
(
s
)
except
NotScalarConstantError
:
except
NotScalarConstantError
:
const_shp
=
None
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
)
)
bcast
.
append
(
1
==
const_shp
)
return
sh
,
bcast
return
sh
,
bcast
def
make_node
(
self
,
value
,
*
shape
):
def
make_node
(
self
,
value
,
*
shape
):
...
@@ -6037,7 +6037,7 @@ class AllocEmpty(gof.Op):
...
@@ -6037,7 +6037,7 @@ class AllocEmpty(gof.Op):
const_shp
=
get_scalar_constant_value
(
s
)
const_shp
=
get_scalar_constant_value
(
s
)
except
NotScalarConstantError
:
except
NotScalarConstantError
:
const_shp
=
None
const_shp
=
None
bcast
.
append
(
numpy
.
all
(
1
==
const_shp
)
)
bcast
.
append
(
1
==
const_shp
)
otype
=
TensorType
(
dtype
=
self
.
dtype
,
broadcastable
=
bcast
)
otype
=
TensorType
(
dtype
=
self
.
dtype
,
broadcastable
=
bcast
)
output
=
otype
()
output
=
otype
()
return
sh
,
output
return
sh
,
output
...
...
theano/tensor/opt.py
浏览文件 @
f0bd940e
...
@@ -47,7 +47,6 @@ from theano.tensor.type import (values_eq_approx_remove_inf,
...
@@ -47,7 +47,6 @@ from theano.tensor.type import (values_eq_approx_remove_inf,
from
theano.gof.opt
import
(
Optimizer
,
pre_constant_merge
,
from
theano.gof.opt
import
(
Optimizer
,
pre_constant_merge
,
pre_greedy_local_optimizer
)
pre_greedy_local_optimizer
)
from
theano.gof.opt
import
merge_optimizer
from
theano.gof
import
toolbox
from
theano.gof
import
toolbox
from
theano.tensor.basic
import
get_scalar_constant_value
,
ShapeError
,
NotScalarConstantError
from
theano.tensor.basic
import
get_scalar_constant_value
,
ShapeError
,
NotScalarConstantError
from
six
import
StringIO
from
six
import
StringIO
...
@@ -452,8 +451,9 @@ def register_canonicalize(lopt, *tags, **kwargs):
...
@@ -452,8 +451,9 @@ def register_canonicalize(lopt, *tags, **kwargs):
return
register_canonicalize
(
inner_lopt
,
lopt
,
*
tags
,
**
kwargs
)
return
register_canonicalize
(
inner_lopt
,
lopt
,
*
tags
,
**
kwargs
)
return
register
return
register
else
:
else
:
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
))
or
lopt
.
__name__
name
=
kwargs
.
pop
(
'name'
,
None
)
or
lopt
.
__name__
compile
.
optdb
[
'canonicalize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
)
compile
.
optdb
[
'canonicalize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
,
**
kwargs
)
return
lopt
return
lopt
...
@@ -463,8 +463,9 @@ def register_stabilize(lopt, *tags, **kwargs):
...
@@ -463,8 +463,9 @@ def register_stabilize(lopt, *tags, **kwargs):
return
register_stabilize
(
inner_lopt
,
lopt
,
*
tags
,
**
kwargs
)
return
register_stabilize
(
inner_lopt
,
lopt
,
*
tags
,
**
kwargs
)
return
register
return
register
else
:
else
:
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
))
or
lopt
.
__name__
name
=
kwargs
.
pop
(
'name'
,
None
)
or
lopt
.
__name__
compile
.
optdb
[
'stabilize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
)
compile
.
optdb
[
'stabilize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
,
**
kwargs
)
return
lopt
return
lopt
...
@@ -474,9 +475,9 @@ def register_specialize(lopt, *tags, **kwargs):
...
@@ -474,9 +475,9 @@ def register_specialize(lopt, *tags, **kwargs):
return
register_specialize
(
inner_lopt
,
lopt
,
*
tags
,
**
kwargs
)
return
register_specialize
(
inner_lopt
,
lopt
,
*
tags
,
**
kwargs
)
return
register
return
register
else
:
else
:
name
=
(
kwargs
and
kwargs
.
pop
(
'name'
)
)
or
lopt
.
__name__
name
=
kwargs
.
pop
(
'name'
,
None
)
or
lopt
.
__name__
compile
.
optdb
[
'specialize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
compile
.
optdb
[
'specialize'
]
.
register
(
name
,
lopt
,
'fast_run'
,
*
tags
)
*
tags
,
**
kwargs
)
return
lopt
return
lopt
...
@@ -502,11 +503,6 @@ def register_specialize_device(lopt, *tags, **kwargs):
...
@@ -502,11 +503,6 @@ def register_specialize_device(lopt, *tags, **kwargs):
return
lopt
return
lopt
# Register merge_optimizer as a global opt during canonicalize
compile
.
optdb
[
'canonicalize'
]
.
register
(
'canon_merge'
,
merge_optimizer
,
'fast_run'
,
final_opt
=
True
)
#####################
#####################
# Dot optimizations #
# Dot optimizations #
#####################
#####################
...
@@ -1414,6 +1410,172 @@ theano.compile.mode.optdb.register('ShapeOpt', ShapeOptimizer(),
...
@@ -1414,6 +1410,172 @@ theano.compile.mode.optdb.register('ShapeOpt', ShapeOptimizer(),
0.1
,
'fast_run'
,
'fast_compile'
)
0.1
,
'fast_run'
,
'fast_compile'
)
def
local_elemwise_alloc_op
(
ElemwiseOP
,
AllocOP
,
DimShuffleOP
):
def
local_elemwise_alloc
(
node
):
"""
elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION))
-> elemwise(x, y.TensorType(BROADCAST CONDITION))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION))
-> elemwise(x.dimshuffle(...), y.TensorType(BROADCAST CONDITION))
BROADCAST CONDITION: the condition is that the one input that are
not to be optimized to have the same broadcast pattern as the
output.
We can change the alloc by a dimshuffle as the elemwise
already have the shape info. The dimshuffle will be faster
to exec.
"""
if
not
isinstance
(
node
.
op
,
ElemwiseOP
):
return
False
if
len
(
node
.
outputs
)
>
1
:
# Ensure all outputs have the same broadcast pattern
# This is a supposition that I'm not sure is always true.
assert
all
([
o
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
o
in
node
.
outputs
[
1
:]])
# The broadcast pattern of the ouptut must match the broadcast
# pattern of at least one of the inputs.
if
not
any
([
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
i
in
node
.
inputs
]):
return
False
def
dimshuffled_alloc
(
i
):
return
(
isinstance
(
i
.
owner
.
op
,
DimShuffleOP
)
and
i
.
owner
.
inputs
[
0
]
.
owner
and
isinstance
(
i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
AllocOP
))
# At least one input must have an owner that is either a AllocOP or a
# DimShuffleOP with an owner that is a AllocOP -- otherwise there is
# nothing to optimize.
if
not
any
([
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))
for
i
in
node
.
inputs
]):
return
False
# Search for input that we can use as a baseline for the dimensions.
assert_op_idx
=
-
1
for
idx
,
i
in
enumerate
(
node
.
inputs
):
if
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
:
# Prefer an input that is not a AllocOP nor a DimShuffleOP of a
# AllocOP so that all allocs can be optimized.
if
not
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))):
assert_op_idx
=
idx
break
# It may be the case that only AllocOP and DimShuffleOP of AllocOP exist.
if
assert_op_idx
<
0
:
# We want to optimize as many allocs as possible. When
# there is more than one then do all but one. number of
# inputs with alloc or dimshuffle alloc
l2
=
[
i
for
i
in
node
.
inputs
if
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
)))]
# If only 1 alloc or dimshuffle alloc, it is the one we
# will use for the shape. So no alloc would be removed.
if
len
(
l2
)
>
1
:
# l containt inputs with alloc or dimshuffle alloc
# only. Its length will always be at least one, as we
# checked that before
l
=
[
idx
for
idx
,
i
in
enumerate
(
node
.
inputs
)
if
i
.
broadcastable
==
node
.
outputs
[
0
]
.
broadcastable
]
assert_op_idx
=
l
[
0
]
# The first one is as good as any to use.
else
:
# Nothing would be optimized!
return
False
assert_op
=
node
.
inputs
[
assert_op_idx
]
cmp_op
=
assert_op
new_i
=
[]
same_shape
=
node
.
fgraph
.
shape_feature
.
same_shape
for
i
in
node
.
inputs
:
# Remove alloc
if
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
AllocOP
)
and
i
.
owner
.
inputs
[
0
]
.
type
!=
i
.
owner
.
outputs
[
0
]
.
type
):
# when i.owner.inputs[0].type == i.owner.outputs[0].type we
# will remove that alloc later
assert
i
.
type
.
ndim
==
cmp_op
.
ndim
if
(
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
and
not
same_shape
(
i
,
cmp_op
)):
assert_op
=
assert_
(
assert_op
,
*
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
if
not
i
.
type
.
broadcastable
[
idx
]])
new_i
.
append
(
i
.
owner
.
inputs
[
0
])
# Remove Alloc in DimShuffle
elif
i
.
owner
and
dimshuffled_alloc
(
i
):
assert
i
.
type
.
ndim
==
cmp_op
.
type
.
ndim
if
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
:
assert_cond
=
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
if
not
i
.
type
.
broadcastable
[
idx
]
and
not
same_shape
(
i
,
cmp_op
,
idx
,
idx
)]
if
assert_cond
:
assert_op
=
assert_
(
assert_op
,
*
assert_cond
)
alloc_input
=
i
.
owner
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
if
alloc_input
.
ndim
!=
i
.
owner
.
inputs
[
0
]
.
ndim
:
# The alloc can add dimension to the value
# We add a dimshuffle to add them.
# We let later optimization merge the multiple dimshuffle
nb_dim_to_add
=
i
.
owner
.
inputs
[
0
]
.
ndim
-
alloc_input
.
ndim
alloc_input
=
alloc_input
.
dimshuffle
(
[
'x'
]
*
nb_dim_to_add
+
list
(
range
(
alloc_input
.
ndim
)))
# We need to keep the dimshuffle. It could swap axes or
# add dimensions anywhere.
r_i
=
i
.
owner
.
op
(
alloc_input
)
# Copy stack trace from i to new_i
copy_stack_trace
(
i
,
r_i
)
new_i
.
append
(
r_i
)
else
:
new_i
.
append
(
i
)
new_i
[
assert_op_idx
]
=
assert_op
ret
=
node
.
op
(
*
new_i
,
return_list
=
True
)
# Copy over stack trace from previous outputs to new outputs.
copy_stack_trace
(
node
.
outputs
,
ret
)
return
ret
return
local_elemwise_alloc
# TODO, global optimizer that lift the assert to the beginning of the graph.
# TODO, optimize all inputs when possible -- currently when all inputs have
# an alloc all but one is optimized.
local_elemwise_alloc
=
register_specialize
(
gof
.
local_optimizer
([
T
.
Elemwise
])(
local_elemwise_alloc_op
(
T
.
Elemwise
,
T
.
Alloc
,
T
.
DimShuffle
)),
'local_alloc_elemwise'
)
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
"DEPRECATED: If True, enable the experimental"
" optimization local_alloc_elemwise."
" Generates error if not True. Use"
" optimizer_excluding=local_alloc_elemwise"
" to dsiable."
,
theano
.
configparser
.
BoolParam
(
True
,
is_valid
=
lambda
x
:
x
),
in_c_key
=
False
)
# False could make the graph faster but not as safe.
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise_assert'
,
"When the local_alloc_elemwise is applied, add"
" an assert to highlight shape errors."
,
theano
.
configparser
.
BoolParam
(
True
),
in_c_key
=
False
)
@gof.local_optimizer
([
T
.
Elemwise
])
@gof.local_optimizer
([
T
.
Elemwise
])
def
local_fill_sink
(
node
):
def
local_fill_sink
(
node
):
"""
"""
...
@@ -1443,7 +1605,6 @@ def local_fill_sink(node):
...
@@ -1443,7 +1605,6 @@ def local_fill_sink(node):
# The newly created node c doesn't has 'clients',
# The newly created node c doesn't has 'clients',
# so this iteration is took place with node.outputs[0]
# so this iteration is took place with node.outputs[0]
replacements
=
{
node
.
outputs
[
0
]:
c
}
replacements
=
{
node
.
outputs
[
0
]:
c
}
all_clients_replaced
=
True
for
client
,
cl_idx
in
node
.
outputs
[
0
]
.
clients
:
for
client
,
cl_idx
in
node
.
outputs
[
0
]
.
clients
:
if
(
hasattr
(
client
,
'op'
)
and
if
(
hasattr
(
client
,
'op'
)
and
isinstance
(
client
.
op
,
T
.
Elemwise
)
and
isinstance
(
client
.
op
,
T
.
Elemwise
)
and
...
@@ -1456,13 +1617,8 @@ def local_fill_sink(node):
...
@@ -1456,13 +1617,8 @@ def local_fill_sink(node):
new_client
.
owner
.
outputs
[
0
]
.
clients
=
client
.
outputs
[
0
]
.
clients
new_client
.
owner
.
outputs
[
0
]
.
clients
=
client
.
outputs
[
0
]
.
clients
r
=
local_fill_sink
.
transform
(
new_client
.
owner
)
r
=
local_fill_sink
.
transform
(
new_client
.
owner
)
if
not
r
:
if
not
r
:
all_clients_replaced
=
False
continue
continue
replacements
.
update
(
r
)
replacements
.
update
(
r
)
else
:
all_clients_replaced
=
False
if
all_clients_replaced
:
replacements
.
pop
(
node
.
outputs
[
0
],
None
)
return
replacements
return
replacements
register_canonicalize
(
local_fill_sink
)
register_canonicalize
(
local_fill_sink
)
...
@@ -1470,7 +1626,7 @@ register_canonicalize(local_fill_sink)
...
@@ -1470,7 +1626,7 @@ register_canonicalize(local_fill_sink)
@register_specialize
@register_specialize
@register_stabilize
@register_stabilize
@register_canonicalize
# @register_canonicalize # We make full pass after the canonizer phase.
@gof.local_optimizer
([
T
.
fill
])
@gof.local_optimizer
([
T
.
fill
])
def
local_fill_to_alloc
(
node
):
def
local_fill_to_alloc
(
node
):
"""fill(s,v) -> alloc(v, shape(s))
"""fill(s,v) -> alloc(v, shape(s))
...
@@ -1510,7 +1666,18 @@ def local_fill_to_alloc(node):
...
@@ -1510,7 +1666,18 @@ def local_fill_to_alloc(node):
node
,)
# theano.printing.debugprint(node.outputs[0], file='str'))
node
,)
# theano.printing.debugprint(node.outputs[0], file='str'))
return
rval
return
rval
# Register this after stabilize at 1.5 to make sure stabilize don't
# get affected by less canonicalized graph due to alloc.
compile
.
optdb
.
register
(
'local_fill_to_alloc'
,
in2out
(
local_fill_to_alloc
),
1.51
,
'fast_run'
)
# Needed to clean some extra alloc added by local_fill_to_alloc
compile
.
optdb
.
register
(
'local_elemwise_alloc'
,
in2out
(
local_elemwise_alloc
),
1.52
,
'fast_run'
)
@register_canonicalize
(
"fast_compile"
)
@gof.local_optimizer
([
T
.
fill
])
@gof.local_optimizer
([
T
.
fill
])
def
local_useless_fill
(
node
):
def
local_useless_fill
(
node
):
"""fill(s,v) -> v
"""fill(s,v) -> v
...
@@ -1526,9 +1693,6 @@ def local_useless_fill(node):
...
@@ -1526,9 +1693,6 @@ def local_useless_fill(node):
# this is a useless fill, erase it.
# this is a useless fill, erase it.
# also, we don't need to copy over any stack traces here
# also, we don't need to copy over any stack traces here
return
[
v
]
return
[
v
]
compile
.
optdb
[
'canonicalize'
]
.
register
(
'local_useless_fill'
,
in2out
(
local_useless_fill
),
1.1
,
'fast_compile'
)
@register_specialize
@register_specialize
...
@@ -2009,172 +2173,6 @@ compile.optdb['specialize'].register('local_remove_all_assert',
...
@@ -2009,172 +2173,6 @@ compile.optdb['specialize'].register('local_remove_all_assert',
'unsafe'
,
'unsafe'
,
use_db_name_as_tag
=
False
)
use_db_name_as_tag
=
False
)
def
local_elemwise_alloc_op
(
ElemwiseOP
,
AllocOP
,
DimShuffleOP
):
def
local_elemwise_alloc
(
node
):
"""
elemwise(alloc(x, shp), ..., y.TensorType(BROADCAST CONDITION))
-> elemwise(x, y.TensorType(BROADCAST CONDITION))
elemwise(dimshuffle(alloc(x, shp)),... ,y.TensorType(BROADCAST CONDITION))
-> elemwise(x.dimshuffle(...), y.TensorType(BROADCAST CONDITION))
BROADCAST CONDITION: the condition is that the one input that are
not to be optimized to have the same broadcast pattern as the
output.
We can change the alloc by a dimshuffle as the elemwise
already have the shape info. The dimshuffle will be faster
to exec.
"""
if
not
isinstance
(
node
.
op
,
ElemwiseOP
):
return
False
if
len
(
node
.
outputs
)
>
1
:
# Ensure all outputs have the same broadcast pattern
# This is a supposition that I'm not sure is always true.
assert
all
([
o
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
o
in
node
.
outputs
[
1
:]])
# The broadcast pattern of the ouptut must match the broadcast
# pattern of at least one of the inputs.
if
not
any
([
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
for
i
in
node
.
inputs
]):
return
False
def
dimshuffled_alloc
(
i
):
return
(
isinstance
(
i
.
owner
.
op
,
DimShuffleOP
)
and
i
.
owner
.
inputs
[
0
]
.
owner
and
isinstance
(
i
.
owner
.
inputs
[
0
]
.
owner
.
op
,
AllocOP
))
# At least one input must have an owner that is either a AllocOP or a
# DimShuffleOP with an owner that is a AllocOP -- otherwise there is
# nothing to optimize.
if
not
any
([
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))
for
i
in
node
.
inputs
]):
return
False
# Search for input that we can use as a baseline for the dimensions.
assert_op_idx
=
-
1
for
idx
,
i
in
enumerate
(
node
.
inputs
):
if
i
.
type
.
broadcastable
==
node
.
outputs
[
0
]
.
type
.
broadcastable
:
# Prefer an input that is not a AllocOP nor a DimShuffleOP of a
# AllocOP so that all allocs can be optimized.
if
not
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
))):
assert_op_idx
=
idx
break
# It may be the case that only AllocOP and DimShuffleOP of AllocOP exist.
if
assert_op_idx
<
0
:
# We want to optimize as many allocs as possible. When
# there is more than one then do all but one. number of
# inputs with alloc or dimshuffle alloc
l2
=
[
i
for
i
in
node
.
inputs
if
(
i
.
owner
and
(
isinstance
(
i
.
owner
.
op
,
AllocOP
)
or
dimshuffled_alloc
(
i
)))]
# If only 1 alloc or dimshuffle alloc, it is the one we
# will use for the shape. So no alloc would be removed.
if
len
(
l2
)
>
1
:
# l containt inputs with alloc or dimshuffle alloc
# only. Its length will always be at least one, as we
# checked that before
l
=
[
idx
for
idx
,
i
in
enumerate
(
node
.
inputs
)
if
i
.
broadcastable
==
node
.
outputs
[
0
]
.
broadcastable
]
assert_op_idx
=
l
[
0
]
# The first one is as good as any to use.
else
:
# Nothing would be optimized!
return
False
assert_op
=
node
.
inputs
[
assert_op_idx
]
cmp_op
=
assert_op
new_i
=
[]
same_shape
=
node
.
fgraph
.
shape_feature
.
same_shape
for
i
in
node
.
inputs
:
# Remove alloc
if
(
i
.
owner
and
isinstance
(
i
.
owner
.
op
,
AllocOP
)
and
i
.
owner
.
inputs
[
0
]
.
type
!=
i
.
owner
.
outputs
[
0
]
.
type
):
# when i.owner.inputs[0].type == i.owner.outputs[0].type we
# will remove that alloc later
assert
i
.
type
.
ndim
==
cmp_op
.
ndim
if
(
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
and
not
same_shape
(
i
,
cmp_op
)):
assert_op
=
assert_
(
assert_op
,
*
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
if
not
i
.
type
.
broadcastable
[
idx
]])
new_i
.
append
(
i
.
owner
.
inputs
[
0
])
# Remove Alloc in DimShuffle
elif
i
.
owner
and
dimshuffled_alloc
(
i
):
assert
i
.
type
.
ndim
==
cmp_op
.
type
.
ndim
if
theano
.
config
.
experimental
.
local_alloc_elemwise_assert
:
assert_cond
=
[
T
.
eq
(
i
.
shape
[
idx
],
cmp_op
.
shape
[
idx
])
for
idx
in
xrange
(
i
.
type
.
ndim
)
if
not
i
.
type
.
broadcastable
[
idx
]
and
not
same_shape
(
i
,
cmp_op
,
idx
,
idx
)]
if
assert_cond
:
assert_op
=
assert_
(
assert_op
,
*
assert_cond
)
alloc_input
=
i
.
owner
.
inputs
[
0
]
.
owner
.
inputs
[
0
]
if
alloc_input
.
ndim
!=
i
.
owner
.
inputs
[
0
]
.
ndim
:
# The alloc can add dimension to the value
# We add a dimshuffle to add them.
# We let later optimization merge the multiple dimshuffle
nb_dim_to_add
=
i
.
owner
.
inputs
[
0
]
.
ndim
-
alloc_input
.
ndim
alloc_input
=
alloc_input
.
dimshuffle
(
[
'x'
]
*
nb_dim_to_add
+
list
(
range
(
alloc_input
.
ndim
)))
# We need to keep the dimshuffle. It could swap axes or
# add dimensions anywhere.
r_i
=
i
.
owner
.
op
(
alloc_input
)
# Copy stack trace from i to new_i
copy_stack_trace
(
i
,
r_i
)
new_i
.
append
(
r_i
)
else
:
new_i
.
append
(
i
)
new_i
[
assert_op_idx
]
=
assert_op
ret
=
node
.
op
(
*
new_i
,
return_list
=
True
)
# Copy over stack trace from previous outputs to new outputs.
copy_stack_trace
(
node
.
outputs
,
ret
)
return
ret
return
local_elemwise_alloc
# TODO, global optimizer that lift the assert to the beginning of the graph.
# TODO, optimize all inputs when possible -- currently when all inputs have
# an alloc all but one is optimized.
local_elemwise_alloc
=
register_specialize
(
gof
.
local_optimizer
([
T
.
Elemwise
])(
local_elemwise_alloc_op
(
T
.
Elemwise
,
T
.
Alloc
,
T
.
DimShuffle
)),
'local_alloc_elemwise'
)
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise'
,
"DEPRECATED: If True, enable the experimental"
" optimization local_alloc_elemwise."
" Generates error if not True. Use"
" optimizer_excluding=local_alloc_elemwise"
" to dsiable."
,
theano
.
configparser
.
BoolParam
(
True
,
is_valid
=
lambda
x
:
x
),
in_c_key
=
False
)
# False could make the graph faster but not as safe.
theano
.
configparser
.
AddConfigVar
(
'experimental.local_alloc_elemwise_assert'
,
"When the local_alloc_elemwise is applied, add"
" an assert to highlight shape errors."
,
theano
.
configparser
.
BoolParam
(
True
),
in_c_key
=
False
)
#######################
#######################
# Constant Canonicalization
# Constant Canonicalization
############################
############################
...
@@ -4018,7 +4016,9 @@ class Canonizer(gof.LocalOptimizer):
...
@@ -4018,7 +4016,9 @@ class Canonizer(gof.LocalOptimizer):
"""
"""
if
isinstance
(
v
,
Variable
):
if
isinstance
(
v
,
Variable
):
try
:
try
:
return
get_scalar_constant_value
(
v
)
# As the constant folding is in the canonicalize phase,
# We don't need to check all the graph each time.
return
get_scalar_constant_value
(
v
,
only_process_constants
=
True
)
except
NotScalarConstantError
:
except
NotScalarConstantError
:
return
None
return
None
else
:
else
:
...
@@ -5467,9 +5467,6 @@ def local_greedy_distributor(node):
...
@@ -5467,9 +5467,6 @@ def local_greedy_distributor(node):
return
[
rval
]
return
[
rval
]
@register_canonicalize
(
'fast_compile'
)
@register_stabilize
(
'fast_compile'
)
@register_specialize
(
'fast_compile'
)
@gof.local_optimizer
(
None
)
@gof.local_optimizer
(
None
)
def
constant_folding
(
node
):
def
constant_folding
(
node
):
for
input
in
node
.
inputs
:
for
input
in
node
.
inputs
:
...
@@ -5519,6 +5516,13 @@ def constant_folding(node):
...
@@ -5519,6 +5516,13 @@ def constant_folding(node):
return
rval
return
rval
topo_constant_folding
=
in2out
(
constant_folding
,
ignore_newtrees
=
True
,
name
=
"topo_constant_folding"
)
register_canonicalize
(
topo_constant_folding
,
'fast_compile'
,
final_opt
=
True
)
register_stabilize
(
topo_constant_folding
,
'fast_compile'
,
final_opt
=
True
)
register_specialize
(
topo_constant_folding
,
'fast_compile'
,
final_opt
=
True
)
def
_is_1
(
expr
):
def
_is_1
(
expr
):
"""
"""
...
@@ -5758,7 +5762,7 @@ def local_log_erfc(node):
...
@@ -5758,7 +5762,7 @@ def local_log_erfc(node):
# sqrt(pi)*-x/(1-1/(2*x**2)+3/(4*x**4)-15/(8*x**6)))
# sqrt(pi)*-x/(1-1/(2*x**2)+3/(4*x**4)-15/(8*x**6)))
# for float64: threshold=26.63 see at the end of the fct for the explaination
# for float64: threshold=26.63 see at the end of the fct for the explaination
# for float32: threshold=9.3 see at the end of the fct for the explaination
# for float32: threshold=9.3 see at the end of the fct for the explaination
# TODO: remove the contraint that there are only 2 inputs to
mul and
exp(x**2)
# TODO: remove the contraint that there are only 2 inputs to exp(x**2)
# is the second.
# is the second.
# TODO: at the test point 10 in float32, there is instability in the original
# TODO: at the test point 10 in float32, there is instability in the original
# value. The original gives -30.0, the stab -20.1 and in float64 -18.1.
# value. The original gives -30.0, the stab -20.1 and in float64 -18.1.
...
@@ -5779,20 +5783,23 @@ def local_grad_log_erfc_neg(node):
...
@@ -5779,20 +5783,23 @@ def local_grad_log_erfc_neg(node):
# The mul is optional.
# The mul is optional.
if
node
.
inputs
[
0
]
.
owner
.
op
!=
T
.
mul
:
if
node
.
inputs
[
0
]
.
owner
.
op
!=
T
.
mul
:
mul
=
None
mul
=
None
y
=
1
y
=
[]
if
not
node
.
inputs
[
0
]
.
owner
or
node
.
inputs
[
0
]
.
owner
.
op
!=
T
.
exp
:
if
not
node
.
inputs
[
0
]
.
owner
or
node
.
inputs
[
0
]
.
owner
.
op
!=
T
.
exp
:
return
False
return
False
exp
=
node
.
inputs
[
0
]
exp
=
node
.
inputs
[
0
]
else
:
else
:
mul
=
node
.
inputs
[
0
]
mul
=
node
.
inputs
[
0
]
if
mul
.
owner
.
inputs
[
0
]
.
owner
or
len
(
mul
.
owner
.
inputs
)
!=
2
:
exp
=
None
return
False
for
idx
,
inp
in
enumerate
(
mul
.
owner
.
inputs
):
y
=
mul
.
owner
.
inputs
[
0
]
if
inp
.
owner
and
inp
.
owner
.
op
==
T
.
exp
:
if
(
not
mul
.
owner
.
inputs
[
1
]
.
owner
or
exp
=
inp
mul
.
owner
.
inputs
[
1
]
.
owner
.
op
!=
T
.
exp
):
break
return
False
if
len
(
mul
.
owner
.
inputs
)
==
2
:
exp
=
mul
.
owner
.
inputs
[
1
]
y
=
[
mul
.
owner
.
inputs
[
1
-
idx
]]
else
:
y
=
mul
.
owner
.
inputs
[:]
del
y
[
idx
]
del
mul
if
not
exp
.
owner
.
inputs
[
0
]
.
owner
:
if
not
exp
.
owner
.
inputs
[
0
]
.
owner
:
return
False
return
False
...
@@ -5894,9 +5901,10 @@ def local_grad_log_erfc_neg(node):
...
@@ -5894,9 +5901,10 @@ def local_grad_log_erfc_neg(node):
# threshold = 10.1
# threshold = 10.1
elif
x
.
dtype
==
'float64'
:
elif
x
.
dtype
==
'float64'
:
threshold
=
26.641747557
threshold
=
26.641747557
ret
=
T
.
switch
(
x
<
threshold
,
true_div_no_mul
,
stab_value
)
*
y
ret
=
T
.
switch
(
x
<
threshold
,
true_div_no_mul
,
stab_value
)
if
y
:
ret
=
T
.
mul
(
ret
,
*
y
)
ret
.
values_eq_approx
=
values_eq_approx_remove_inf_nan
ret
.
values_eq_approx
=
values_eq_approx_remove_inf_nan
return
[
ret
]
return
[
ret
]
"""
"""
The libm used for the test is amdlibm
The libm used for the test is amdlibm
...
...
theano/tensor/signal/downsample.py
浏览文件 @
f0bd940e
...
@@ -256,7 +256,10 @@ class DownsampleFactorMax(Op):
...
@@ -256,7 +256,10 @@ class DownsampleFactorMax(Op):
raise
TypeError
()
raise
TypeError
()
# TODO: consider restricting the dtype?
# TODO: consider restricting the dtype?
x
=
tensor
.
as_tensor_variable
(
x
)
x
=
tensor
.
as_tensor_variable
(
x
)
return
gof
.
Apply
(
self
,
[
x
],
[
x
.
type
()])
# If the input shape are broadcastable we can have 0 in the output shape
broad
=
x
.
broadcastable
[:
2
]
+
(
False
,
False
)
out
=
tensor
.
TensorType
(
x
.
dtype
,
broad
)
return
gof
.
Apply
(
self
,
[
x
],
[
out
()])
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
x
,
=
inp
x
,
=
inp
...
...
theano/tensor/signal/tests/test_downsample.py
浏览文件 @
f0bd940e
...
@@ -801,6 +801,16 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
...
@@ -801,6 +801,16 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
[
image_val
,
maxout_val
,
gz_val
],
[
image_val
,
maxout_val
,
gz_val
],
MaxPoolGrad
,
MaxPoolGrad
,
warn
=
False
)
warn
=
False
)
# checking with broadcastable input
image
=
tensor
.
tensor
(
dtype
=
'float64'
,
broadcastable
=
(
False
,
False
,
True
,
True
))
image_val
=
rng
.
rand
(
4
,
6
,
1
,
1
)
self
.
_compile_and_check
(
[
image
],
[
DownsampleFactorMax
((
2
,
2
),
ignore_border
=
True
,
padding
=
(
0
,
0
))(
image
)],
[
image_val
],
DownsampleFactorMax
)
def
test_opt_max_to_average
(
self
):
def
test_opt_max_to_average
(
self
):
im
=
theano
.
tensor
.
tensor4
()
im
=
theano
.
tensor
.
tensor4
()
...
...
theano/tensor/tests/test_opt.py
浏览文件 @
f0bd940e
...
@@ -481,7 +481,7 @@ class test_canonize(unittest.TestCase):
...
@@ -481,7 +481,7 @@ class test_canonize(unittest.TestCase):
mode
=
compile
.
mode
.
get_default_mode
()
mode
=
compile
.
mode
.
get_default_mode
()
opt
=
gof
.
Query
([
"canonicalize"
])
opt
=
gof
.
Query
([
"canonicalize"
])
opt
=
opt
.
including
(
'ShapeOpt'
)
opt
=
opt
.
including
(
'ShapeOpt'
,
'local_fill_to_alloc'
)
opt
=
opt
.
excluding
(
opt
=
opt
.
excluding
(
'local_elemwise_fusion'
)
'local_elemwise_fusion'
)
mode
=
mode
.
__class__
(
linker
=
mode
.
linker
,
optimizer
=
opt
)
mode
=
mode
.
__class__
(
linker
=
mode
.
linker
,
optimizer
=
opt
)
...
@@ -4021,7 +4021,8 @@ class T_Rebroadcast(unittest.TestCase):
...
@@ -4021,7 +4021,8 @@ class T_Rebroadcast(unittest.TestCase):
class
T_useless_elemwise
(
unittest
.
TestCase
):
class
T_useless_elemwise
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
mode
=
theano
.
compile
.
get_default_mode
()
.
including
(
'canonicalize'
)
self
.
mode
=
theano
.
compile
.
get_default_mode
()
.
including
(
'canonicalize'
,
'local_fill_to_alloc'
)
def
test_eq
(
self
):
def
test_eq
(
self
):
x
=
T
.
dmatrix
()
x
=
T
.
dmatrix
()
...
@@ -4545,7 +4546,7 @@ class T_local_erfc(unittest.TestCase):
...
@@ -4545,7 +4546,7 @@ class T_local_erfc(unittest.TestCase):
# test that we work without the mul
# test that we work without the mul
f
=
theano
.
function
([
x
],
T
.
exp
(
T
.
neg
(
T
.
sqr
(
x
)))
/
T
.
erfc
(
x
),
mode
=
mode
)
f
=
theano
.
function
([
x
],
T
.
exp
(
T
.
neg
(
T
.
sqr
(
x
)))
/
T
.
erfc
(
x
),
mode
=
mode
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
3
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
2
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
f
.
maker
.
fgraph
.
outputs
[
0
]
.
dtype
==
theano
.
config
.
floatX
assert
f
.
maker
.
fgraph
.
outputs
[
0
]
.
dtype
==
theano
.
config
.
floatX
assert
all
(
numpy
.
isfinite
(
f
(
val
)))
assert
all
(
numpy
.
isfinite
(
f
(
val
)))
...
@@ -4558,7 +4559,7 @@ class T_local_erfc(unittest.TestCase):
...
@@ -4558,7 +4559,7 @@ class T_local_erfc(unittest.TestCase):
# test that we work without the sqr and neg
# test that we work without the sqr and neg
f
=
theano
.
function
([
x
],
T
.
exp
(
T
.
mul
(
-
1
,
x
,
x
))
/
T
.
erfc
(
x
),
mode
=
mode
)
f
=
theano
.
function
([
x
],
T
.
exp
(
T
.
mul
(
-
1
,
x
,
x
))
/
T
.
erfc
(
x
),
mode
=
mode
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
2
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
==
2
1
,
len
(
f
.
maker
.
fgraph
.
apply_nodes
)
assert
f
.
maker
.
fgraph
.
outputs
[
0
]
.
dtype
==
theano
.
config
.
floatX
assert
f
.
maker
.
fgraph
.
outputs
[
0
]
.
dtype
==
theano
.
config
.
floatX
assert
all
(
numpy
.
isfinite
(
f
(
val
)))
assert
all
(
numpy
.
isfinite
(
f
(
val
)))
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论