Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
bb096349
提交
bb096349
authored
6月 15, 2012
作者:
lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #666 from nouiz/time_opt
Time opt
上级
b8165faa
0c0c9f91
隐藏空白字符变更
内嵌
并排
正在显示
11 个修改的文件
包含
315 行增加
和
21 行删除
+315
-21
function_module.py
theano/compile/function_module.py
+13
-4
mode.py
theano/compile/mode.py
+4
-0
profiling.py
theano/compile/profiling.py
+33
-3
env.py
theano/gof/env.py
+1
-0
graph.py
theano/gof/graph.py
+1
-1
opt.py
theano/gof/opt.py
+235
-5
optdb.py
theano/gof/optdb.py
+4
-1
toolbox.py
theano/gof/toolbox.py
+15
-4
vm.py
theano/gof/vm.py
+3
-0
test_driver.py
theano/sandbox/cuda/tests/test_driver.py
+4
-2
scan.py
theano/scan_module/scan.py
+2
-1
没有找到文件。
theano/compile/function_module.py
浏览文件 @
bb096349
...
...
@@ -994,11 +994,16 @@ class FunctionMaker(object):
# we allow ProfileMode to provide a ProfileStats object
# using this somewhat awkward mechanism.
mode_profile
=
getattr
(
mode
,
'profile'
,
None
)
if
(
profile
is
not
None
)
and
(
mode_profile
is
not
None
):
if
(
profile
is
not
None
and
profile
is
not
False
and
mode_profile
is
not
None
):
raise
TypeError
(
'profile passed via both "mode" and "profile" arguments'
)
self
.
profile
=
profile
=
profile
or
mode_profile
if
profile
:
# We preload the cache here to don't have its timming
# included in optimization that compile function.
theano
.
gof
.
cc
.
get_module_cache
()
# Handle the case where inputs and/or outputs is a single Variable (not in a list)
self
.
orig_outputs
=
outputs
unpack_single
=
False
...
...
@@ -1030,6 +1035,8 @@ class FunctionMaker(object):
# make the env (copies the graph, creates NEW INPUT AND OUTPUT VARIABLES)
env
,
additional_outputs
=
std_env
(
expanded_inputs
,
outputs
,
accept_inplace
)
env
.
profile
=
profile
self
.
env
=
env
# Fetch the optimizer and linker
...
...
@@ -1042,13 +1049,15 @@ class FunctionMaker(object):
theano
.
config
.
compute_test_value
=
"off"
gof
.
Op
.
add_stack_trace_on_call
=
False
start_optimizer
=
time
.
time
()
optimizer
(
env
)
optimizer
_profile
=
optimizer
(
env
)
end_optimizer
=
time
.
time
()
opt_time
=
end_optimizer
-
start_optimizer
mode
.
optimizer_time
+=
opt_time
if
profile
:
profile
.
optimizer_time
+=
opt_time
if
theano
.
config
.
profile_optimizer
:
profile
.
optimizer_profile
=
(
optimizer
,
optimizer_profile
)
_logger
.
debug
(
'Optimizing took
%
f seconds'
,
opt_time
)
#Add deep copy to respect the memory interface
...
...
theano/compile/mode.py
浏览文件 @
bb096349
...
...
@@ -88,6 +88,10 @@ OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
OPT_FAST_COMPILE
=
gof
.
Query
(
include
=
[
'fast_compile'
])
OPT_STABILIZE
=
gof
.
Query
(
include
=
[
'fast_run'
])
OPT_STABILIZE
.
position_cutoff
=
1.5000001
OPT_FAST_RUN
.
name
=
'OPT_FAST_RUN'
OPT_FAST_RUN_STABLE
.
name
=
'OPT_FAST_RUN_STABLE'
OPT_FAST_COMPILE
.
name
=
'OPT_FAST_COMPILE'
OPT_STABILIZE
.
name
=
'OPT_STABILIZE'
predefined_optimizers
=
{
None
:
(
lambda
env
:
None
),
...
...
theano/compile/profiling.py
浏览文件 @
bb096349
...
...
@@ -38,12 +38,14 @@ AddConfigVar('profiling.time_thunks',
def
_atexit_print_fn
():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file
"""
printed
=
0
for
ps
in
_atexit_print_list
:
if
ps
.
fct_callcount
or
ps
.
compile_time
>
0
:
ps
.
summary
(
file
=
_atexit_print_file
)
printed
+=
1
else
:
print
'Skipping empty Profile'
if
len
(
_atexit_print_list
)
>
1
:
if
printed
>
1
:
# Make a global profile
cum
=
copy
.
copy
(
_atexit_print_list
[
0
])
cum
.
message
=
"Sum of all printed profiles at exit"
...
...
@@ -51,14 +53,26 @@ def _atexit_print_fn():
# for ps in [ps for ps in _atexit_print_list[1:]
# if not isinstance(ps, ScanProfileStats)]:
for
attr
in
[
"compile_time"
,
"fct_call_time"
,
"fct_callcount"
,
"vm_call_time"
,
"optimizer_time"
,
"linker_time"
]:
"vm_call_time"
,
"optimizer_time"
,
"linker_time"
,
"validate_time"
]:
setattr
(
cum
,
attr
,
getattr
(
cum
,
attr
)
+
getattr
(
ps
,
attr
))
#merge dictonary
for
attr
in
[
"apply_time"
,
"apply_callcount"
,
"apply_cimpl"
,
"outputs_size"
]:
cum_attr
=
getattr
(
cum
,
attr
)
for
key
,
val
in
getattr
(
ps
,
attr
)
.
iteritems
():
assert
key
not
in
cum_attr
cum_attr
[
key
]
=
val
if
cum
.
optimizer_profile
and
ps
.
optimizer_profile
:
merge
=
cum
.
optimizer_profile
[
0
]
.
merge_profile
(
cum
.
optimizer_profile
[
1
],
ps
.
optimizer_profile
[
1
])
cum
.
optimizer_profile
=
(
cum
.
optimizer_profile
[
0
],
merge
)
else
:
cum
.
optimizer_profile
=
None
cum
.
summary
(
file
=
_atexit_print_file
)
...
...
@@ -118,11 +132,19 @@ class ProfileStats(object):
optimizer_time
=
0.0
# time spent optimizing graph (FunctionMaker.__init__)
validate_time
=
0.0
# time spent in env.validate
# This is a subset of optimizer_time that is dominated by toposort()
# when the destorymap feature is included.
linker_time
=
0.0
# time spent linking graph (FunctionMaker.create)
line_width
=
140
optimizer_profile
=
None
# None or tuple (the optimizer, the profile it returned)
# param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
**
kwargs
):
...
...
@@ -390,11 +412,15 @@ class ProfileStats(object):
local_time
,
100
*
local_time
/
self
.
fct_call_time
)
print
>>
file
,
' Total compile time:
%
es'
%
self
.
compile_time
print
>>
file
,
' Theano Optimizer time:
%
es'
%
self
.
optimizer_time
print
>>
file
,
' Theano validate time:
%
es'
%
self
.
validate_time
print
>>
file
,
(
' Theano Linker time (includes C,'
' CUDA code generation/compiling):
%
es'
%
self
.
linker_time
)
print
>>
file
,
''
# The validation time is a subset of optimizer_time
assert
self
.
validate_time
<
self
.
optimizer_time
def
summary
(
self
,
file
=
sys
.
stderr
,
n_ops_to_print
=
20
,
n_applies_to_print
=
20
):
self
.
summary_function
(
file
)
...
...
@@ -402,9 +428,13 @@ class ProfileStats(object):
if
local_time
>
0
:
self
.
summary_ops
(
file
,
n_ops_to_print
)
self
.
summary_nodes
(
file
,
n_applies_to_print
)
el
se
:
el
if
self
.
fct_callcount
>
0
:
print
>>
file
,
(
" No node time accumulated "
"(hint: try config profiling.time_thunks=1)"
)
if
self
.
optimizer_profile
:
print
"Optimizer Profile"
print
"-----------------"
self
.
optimizer_profile
[
0
]
.
print_profile
(
file
,
self
.
optimizer_profile
[
1
])
if
0
:
# old code still to be ported from ProfileMode
...
...
theano/gof/env.py
浏览文件 @
bb096349
...
...
@@ -129,6 +129,7 @@ class Env(utils.object2):
self
.
node_locks
=
{}
self
.
variable_locks
=
{}
self
.
profile
=
None
### Setup a Variable ###
...
...
theano/gof/graph.py
浏览文件 @
bb096349
...
...
@@ -567,7 +567,7 @@ def clone(i, o, copy_inputs = True):
:type o: list
:param o: output L{Variable}s
:type copy_inputs: bool
:param copy_inputs: if True, the inputs will be copied (defaults to
Fals
e)
:param copy_inputs: if True, the inputs will be copied (defaults to
Tru
e)
Returns the inputs and outputs of that copy.
"""
...
...
theano/gof/opt.py
浏览文件 @
bb096349
...
...
@@ -75,7 +75,7 @@ class Optimizer(object):
opt.apply(env)
"""
self
.
add_requirements
(
env
)
self
.
apply
(
env
,
*
args
,
**
kwargs
)
return
self
.
apply
(
env
,
*
args
,
**
kwargs
)
def
__call__
(
self
,
env
):
"""WRITEME
...
...
@@ -98,6 +98,12 @@ class Optimizer(object):
print
>>
stream
,
"
%
s
%
s
%
s id=
%
i"
%
(
(
' '
*
level
),
self
.
__class__
.
__name__
,
name
,
id
(
self
))
def
print_profile
(
self
,
prof
):
if
prof
is
not
None
:
raise
NotImplementedError
(
"The function print_profile must be overrided if the"
" optimizer return profiling information."
)
class
FromFunctionOptimizer
(
Optimizer
):
"""WRITEME"""
...
...
@@ -154,12 +160,16 @@ class SeqOptimizer(Optimizer, list):
Applies each L{Optimizer} in self in turn.
"""
l
=
[]
if
env
.
profile
:
validate_before
=
env
.
profile
.
validate_time
nb_node_before
=
len
(
env
.
nodes
)
sub_profs
=
[]
for
optimizer
in
self
:
try
:
t0
=
time
.
time
()
optimizer
.
optimize
(
env
)
sub_prof
=
optimizer
.
optimize
(
env
)
l
.
append
(
float
(
time
.
time
()
-
t0
))
sub_profs
.
append
(
sub_prof
)
except
AssertionError
:
# do not catch Assertion failures
raise
...
...
@@ -169,12 +179,14 @@ class SeqOptimizer(Optimizer, list):
continue
else
:
raise
if
config
.
time_seq_optimizer
:
print
"SeqOptimizer"
,
if
hasattr
(
self
,
"name"
):
print
self
.
name
,
elif
hasattr
(
self
,
"__name__"
):
print
self
.
__name__
,
print
" time
%.3
fs for
%
d/
%
d nodes before/after optimization"
%
(
sum
(
l
),
nb_node_before
,
len
(
env
.
nodes
))
print
" time
%.3
fs for validate "
%
(
env
.
profile
.
validate_time
-
validate_before
)
ll
=
[]
for
opt
in
self
:
if
hasattr
(
opt
,
"__name__"
):
...
...
@@ -191,6 +203,12 @@ class SeqOptimizer(Optimizer, list):
for
(
t
,
opt
)
in
lll
[::
-
1
]:
print
'
%.6
fs -
%
s'
%
(
t
,
opt
)
print
if
env
.
profile
:
validate_time
=
env
.
profile
.
validate_time
-
validate_before
else
:
validate_time
=
None
return
(
self
,
l
,
validate_time
,
nb_node_before
,
len
(
env
.
nodes
),
sub_profs
)
def
__eq__
(
self
,
other
):
#added to override the list's __eq__ implementation
...
...
@@ -216,6 +234,115 @@ class SeqOptimizer(Optimizer, list):
for
opt
in
self
:
opt
.
print_summary
(
stream
,
level
=
(
level
+
2
),
depth
=
depth
)
@staticmethod
def
print_profile
(
stream
,
prof
,
level
=
0
):
(
opts
,
prof
,
validate_time
,
nb_node_before
,
nb_node_after
,
sub_profs
)
=
prof
blanc
=
(
' '
*
level
)
print
>>
stream
,
blanc
,
"SeqOptimizer"
,
if
hasattr
(
opts
,
"name"
):
print
>>
stream
,
blanc
,
opts
.
name
,
elif
hasattr
(
opts
,
"__name__"
):
print
>>
stream
,
blanc
,
opts
.
__name__
,
print
>>
stream
,
(
" time
%.3
fs for
%
d/
%
d nodes"
" before/after optimization"
%
(
sum
(
prof
),
nb_node_before
,
nb_node_after
))
print
>>
stream
,
blanc
,
"
%.3
fs for env.validate()"
%
(
validate_time
)
if
level
==
0
:
print
>>
stream
,
blanc
,
" time - (name, class, index)"
ll
=
[]
for
opt
in
opts
:
if
hasattr
(
opt
,
"__name__"
):
ll
.
append
((
opt
.
__name__
,
opt
.
__class__
.
__name__
,
opts
.
index
(
opt
)))
else
:
ll
.
append
((
opt
.
name
,
opt
.
__class__
.
__name__
,
opts
.
index
(
opt
)))
lll
=
zip
(
prof
,
ll
)
def
cmp
(
a
,
b
):
if
a
[
0
]
==
b
[
0
]:
return
0
elif
a
[
0
]
<
b
[
0
]:
return
-
1
return
1
lll
.
sort
(
cmp
)
for
(
t
,
opt
)
in
lll
[::
-
1
]:
#if t < 1:
# continue
print
>>
stream
,
blanc
,
'
%.6
fs -
%
s'
%
(
t
,
opt
)
if
sub_profs
[
opt
[
-
1
]]:
opts
[
opt
[
-
1
]]
.
print_profile
(
stream
,
sub_profs
[
opt
[
-
1
]],
level
=
level
+
1
)
print
>>
stream
@staticmethod
def
merge_profile
(
prof1
,
prof2
):
"""
Merge 2 profiles returned by this cass apply() fct.
"""
new_t
=
[]
new_l
=
[]
new_sub_profile
=
[]
#merge common(same object) opt
for
l
in
set
(
prof1
[
0
])
.
intersection
(
set
(
prof2
[
0
])):
idx1
=
prof1
[
0
]
.
index
(
l
)
idx2
=
prof2
[
0
]
.
index
(
l
)
new_t
.
append
(
prof1
[
1
][
idx1
]
+
prof2
[
1
][
idx2
])
new_l
.
append
(
l
)
if
hasattr
(
l
,
'merge_profile'
):
assert
len
(
prof1
[
5
][
idx1
])
==
len
(
prof2
[
5
][
idx1
])
new_sub_profile
.
append
(
l
.
merge_profile
(
prof1
[
5
][
idx1
],
prof2
[
5
][
idx2
]))
else
:
new_sub_profile
.
append
(
None
)
# merge not common opt
import
StringIO
for
l
in
set
(
prof1
[
0
])
.
symmetric_difference
(
set
(
prof2
[
0
])):
#The set trick above only work for the same object optimization
#It don't work for equivalent optimization.
#So we try to merge equivalent optimization here.
new_l_names
=
[
o
.
name
for
o
in
new_l
]
if
l
.
name
in
new_l_names
:
idx
=
new_l_names
.
index
(
l
.
name
)
io1
=
StringIO
.
StringIO
()
io2
=
StringIO
.
StringIO
()
l
.
print_summary
(
io1
)
new_l
[
idx
]
.
print_summary
(
io2
)
if
io1
.
read
()
==
io2
.
read
():
if
l
in
prof1
[
0
]:
p
=
prof1
else
:
p
=
prof2
new_t
[
idx
]
+=
p
[
1
][
p
[
0
]
.
index
(
l
)]
if
hasattr
(
l
,
'merge_profile'
):
assert
len
(
p
[
5
][
p
[
0
]
.
index
(
l
)])
==
len
(
new_sub_profile
[
idx
])
new_sub_profile
[
idx
]
=
l
.
merge_profile
(
new_sub_profile
[
idx
],
p
[
5
][
p
[
0
]
.
index
(
l
)])
else
:
new_sub_profile
[
idx
]
=
None
continue
if
l
in
prof1
[
0
]:
p
=
prof1
else
:
p
=
prof2
new_t
.
append
(
p
[
1
][
p
[
0
]
.
index
(
l
)])
idx
=
p
[
0
]
.
index
(
l
)
new_l
.
append
(
l
)
new_sub_profile
.
append
(
p
[
5
][
idx
])
new_opt
=
SeqOptimizer
(
*
new_l
)
assert
set
(
prof1
[
0
])
.
issubset
(
set
(
new_l
))
# assert set(prof2[0]).issubset(set(new_l))
assert
len
(
new_t
)
==
len
(
new_opt
)
==
len
(
new_sub_profile
)
return
(
new_opt
,
new_t
,
prof1
[
2
]
+
prof2
[
2
],
-
1
,
-
1
,
new_sub_profile
)
class
_metadict
:
"""WRITEME"""
...
...
@@ -500,7 +627,9 @@ def MergeOptMerge(opt):
opt introduced additional similarities.
"""
merger
=
merge_optimizer
return
SeqOptimizer
([
merger
,
opt
,
merger
])
opt
=
SeqOptimizer
([
merger
,
opt
,
merger
])
opt
.
name
=
"MergeOptMerge"
return
opt
def
pre_constant_merge
(
vars
):
...
...
@@ -1314,7 +1443,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):
loop_timing
=
[]
global_opt_timing
=
[]
time_lopts
=
{}
io_toposort_timing
=
[]
nb_nodes
=
[]
for
lopt
in
self
.
local_optimizers
:
process_count
.
setdefault
(
lopt
,
0
)
time_lopts
.
setdefault
(
lopt
,
0
)
while
changed
and
not
max_use_abort
:
t0
=
time
.
time
()
...
...
@@ -1333,7 +1467,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
for
node
in
start_from
:
assert
node
in
env
.
outputs
topo_t0
=
time
.
time
()
q
=
deque
(
graph
.
io_toposort
(
env
.
inputs
,
start_from
))
io_toposort_timing
.
append
(
time
.
time
()
-
topo_t0
)
nb_nodes
.
append
(
len
(
q
))
max_nb_nodes
=
max
(
max_nb_nodes
,
len
(
q
))
...
...
@@ -1355,9 +1491,11 @@ class EquilibriumOptimizer(NavigatorOptimizer):
while
q
:
node
=
q
.
pop
()
current_node
=
node
for
lopt
in
self
.
local_optimizers
:
process_count
.
setdefault
(
lopt
,
0
)
t_lopt
=
time
.
time
(
)
lopt_change
=
self
.
process_node
(
env
,
node
,
lopt
)
time_lopts
[
lopt
]
+=
time
.
time
()
-
t_lopt
if
lopt_change
:
process_count
[
lopt
]
+=
1
changed
=
True
...
...
@@ -1402,6 +1540,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print
'
%
d -
%
s'
%
(
count
,
opt
)
print
return
(
self
,
loop_timing
,
process_count
,
max_nb_nodes
,
global_opt_timing
,
nb_nodes
,
time_lopts
,
io_toposort_timing
)
def
print_summary
(
self
,
stream
=
sys
.
stdout
,
level
=
0
,
depth
=-
1
):
name
=
getattr
(
self
,
'name'
,
None
)
print
>>
stream
,
"
%
s
%
s
%
s id=
%
i"
%
(
...
...
@@ -1411,6 +1552,95 @@ class EquilibriumOptimizer(NavigatorOptimizer):
lopt
.
print_summary
(
stream
,
level
=
(
level
+
2
),
depth
=
(
depth
-
1
))
@staticmethod
def
print_profile
(
stream
,
prof
,
level
=
0
):
(
opt
,
loop_timing
,
process_count
,
max_nb_nodes
,
global_opt_timing
,
nb_nodes
,
time_lopts
,
io_toposort_timing
)
=
prof
blanc
=
(
' '
*
level
)
print
>>
stream
,
blanc
,
"EquilibriumOptimizer"
,
print
>>
stream
,
blanc
,
getattr
(
opt
,
"name"
,
getattr
(
opt
,
"__name__"
,
""
))
print
>>
stream
,
blanc
,
" time
%.3
fs for
%
d passes,
%
d nodes max"
%
(
sum
(
loop_timing
),
len
(
loop_timing
),
max_nb_nodes
)
print
>>
stream
,
blanc
,
" time io_toposort
%.3
fs"
%
sum
(
io_toposort_timing
)
for
i
in
range
(
len
(
loop_timing
)):
print
>>
stream
,
blanc
,
(
'
%
d -
%.3
fs (
%.3
fs in global opts, '
'
%.3
fs io_toposort) -
%
d nodes'
%
(
i
,
loop_timing
[
i
],
global_opt_timing
[
i
],
io_toposort_timing
[
i
],
nb_nodes
[
i
]))
count_opt
=
[]
for
opt
,
count
in
process_count
.
iteritems
():
if
count
>
0
:
count_opt
.
append
((
time_lopts
[
opt
],
count
,
opt
))
if
count_opt
:
print
>>
stream
,
blanc
,
'times applied - optimizer (only those applied):'
count_opt
.
sort
()
for
(
t
,
count
,
opt
)
in
count_opt
[::
-
1
]:
print
>>
stream
,
blanc
,
'
%.3
fs -
%
d -
%
s'
%
(
t
,
count
,
opt
)
print
>>
stream
@staticmethod
def
merge_profile
(
prof1
,
prof2
):
#(opt, loop_timing, process_count, max_nb_nodes,
# global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof1
local_optimizers
=
set
(
prof1
[
0
]
.
local_optimizers
)
.
union
(
prof2
[
0
]
.
local_optimizers
)
global_optimizers
=
set
(
prof1
[
0
]
.
global_optimizers
)
.
union
(
prof2
[
0
]
.
global_optimizers
)
new_opt
=
EquilibriumOptimizer
(
local_optimizers
.
union
(
global_optimizers
),
max_use_ratio
=
1
)
def
merge_list
(
l1
,
l2
):
l
=
copy
.
copy
(
l1
)
for
idx
,
nb
in
enumerate
(
l2
):
if
idx
<
len
(
l
):
l
[
idx
]
+=
nb
else
:
l
.
append
(
nb
)
return
l
loop_timing
=
merge_list
(
prof1
[
1
],
prof2
[
1
])
process_count
=
prof1
[
2
]
.
copy
()
for
process
,
count
in
prof2
[
2
]
.
iteritems
():
if
process
in
process_count
:
process_count
[
process
]
+=
count
else
:
process_count
[
process
]
=
count
max_nb_nodes
=
max
(
prof1
[
3
],
prof2
[
3
])
global_opt_timing
=
merge_list
(
prof1
[
4
],
prof2
[
4
])
nb_nodes
=
merge_list
(
prof1
[
5
],
prof2
[
5
])
time_lopts
=
prof1
[
6
]
.
copy
()
for
opt
,
t
in
prof2
[
6
]
.
iteritems
():
if
opt
in
time_lopts
:
time_lopts
[
opt
]
+=
t
else
:
time_lopts
[
opt
]
=
t
io_toposort_timing
=
merge_list
(
prof1
[
7
],
prof2
[
7
])
assert
(
len
(
loop_timing
)
==
len
(
global_opt_timing
)
==
len
(
io_toposort_timing
)
==
len
(
nb_nodes
))
assert
len
(
loop_timing
)
==
max
(
len
(
prof1
[
1
]),
len
(
prof2
[
1
]))
return
(
new_opt
,
loop_timing
,
process_count
,
max_nb_nodes
,
global_opt_timing
,
nb_nodes
,
time_lopts
,
io_toposort_timing
)
#################
### Utilities ###
...
...
theano/gof/optdb.py
浏览文件 @
bb096349
...
...
@@ -229,7 +229,10 @@ class SequenceDB(DB):
opts
=
[
o
for
o
in
opts
if
self
.
__position__
[
o
.
name
]
<
position_cutoff
]
opts
.
sort
(
key
=
lambda
obj
:
self
.
__position__
[
obj
.
name
])
return
opt
.
SeqOptimizer
(
opts
,
failure_callback
=
self
.
failure_callback
)
ret
=
opt
.
SeqOptimizer
(
opts
,
failure_callback
=
self
.
failure_callback
)
if
hasattr
(
tags
[
0
],
'name'
):
ret
.
name
=
tags
[
0
]
.
name
return
ret
def
print_summary
(
self
,
stream
=
sys
.
stdout
):
print
>>
stream
,
"SequenceDB (id
%
i)"
%
id
(
self
)
...
...
theano/gof/toolbox.py
浏览文件 @
bb096349
import
sys
import
time
from
theano.gof.python25
import
partial
...
...
@@ -71,10 +72,20 @@ class History:
class
Validator
:
def
on_attach
(
self
,
env
):
if
hasattr
(
env
,
'validate'
):
raise
AlreadyThere
(
"Validator feature is already present or in"
" conflict with another plugin."
)
env
.
validate
=
lambda
:
env
.
execute_callbacks
(
'validate'
)
for
attr
in
(
'validate'
,
'validate_time'
):
if
hasattr
(
env
,
attr
):
raise
AlreadyThere
(
"Validator feature is already present or in"
" conflict with another plugin."
)
def
validate
():
t0
=
time
.
time
()
ret
=
env
.
execute_callbacks
(
'validate'
)
t1
=
time
.
time
()
if
env
.
profile
:
env
.
profile
.
validate_time
+=
t1
-
t0
return
ret
env
.
validate
=
validate
def
consistent
():
try
:
...
...
theano/gof/vm.py
浏览文件 @
bb096349
...
...
@@ -17,6 +17,9 @@ logger = logging.getLogger(__name__)
AddConfigVar
(
'profile'
,
"If VM should collect profile information"
,
BoolParam
(
False
))
AddConfigVar
(
'profile_optimizer'
,
"If VM should collect optimizer profile information"
,
BoolParam
(
False
))
raise_with_op
=
link
.
raise_with_op
...
...
theano/sandbox/cuda/tests/test_driver.py
浏览文件 @
bb096349
...
...
@@ -24,7 +24,8 @@ def test_nvidia_driver1():
"""
a
=
numpy
.
random
.
rand
(
10000
)
.
astype
(
"float32"
)
A
=
cuda
.
shared_constructor
(
a
)
f
=
theano
.
function
(
inputs
=
[],
outputs
=
A
.
sum
(),
mode
=
mode_with_gpu
)
f
=
theano
.
function
(
inputs
=
[],
outputs
=
A
.
sum
(),
mode
=
mode_with_gpu
,
profile
=
False
)
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
2
assert
sum
(
isinstance
(
node
.
op
,
B
.
GpuSum
)
for
node
in
topo
)
==
1
...
...
@@ -56,7 +57,8 @@ def test_nvidia_driver3():
of the gpu device
"""
var
=
cuda
.
fvector
()
f
=
theano
.
function
([
var
],
var
+
1
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
var
],
var
+
1
,
mode
=
mode_with_gpu
,
profile
=
False
)
topo
=
f
.
maker
.
env
.
toposort
()
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
topo
])
assert
theano
.
sandbox
.
cuda
.
use
.
device_number
is
not
None
...
...
theano/scan_module/scan.py
浏览文件 @
bb096349
...
...
@@ -794,7 +794,8 @@ def scan(fn,
updates
=
updates
,
mode
=
compile
.
mode
.
Mode
(
linker
=
'py'
,
optimizer
=
None
),
on_unused_input
=
'ignore'
)
on_unused_input
=
'ignore'
,
profile
=
False
)
##
### Step 5. Re-arange inputs of scan into a more strict order
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论