Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
bb096349
提交
bb096349
authored
6月 15, 2012
作者:
lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #666 from nouiz/time_opt
Time opt
上级
b8165faa
0c0c9f91
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
11 个修改的文件
包含
80 行增加
和
16 行删除
+80
-16
function_module.py
theano/compile/function_module.py
+13
-4
mode.py
theano/compile/mode.py
+4
-0
profiling.py
theano/compile/profiling.py
+33
-3
env.py
theano/gof/env.py
+1
-0
graph.py
theano/gof/graph.py
+1
-1
opt.py
theano/gof/opt.py
+0
-0
optdb.py
theano/gof/optdb.py
+4
-1
toolbox.py
theano/gof/toolbox.py
+15
-4
vm.py
theano/gof/vm.py
+3
-0
test_driver.py
theano/sandbox/cuda/tests/test_driver.py
+4
-2
scan.py
theano/scan_module/scan.py
+2
-1
没有找到文件。
theano/compile/function_module.py
浏览文件 @
bb096349
...
...
@@ -994,11 +994,16 @@ class FunctionMaker(object):
# we allow ProfileMode to provide a ProfileStats object
# using this somewhat awkward mechanism.
mode_profile
=
getattr
(
mode
,
'profile'
,
None
)
if
(
profile
is
not
None
)
and
(
mode_profile
is
not
None
):
if
(
profile
is
not
None
and
profile
is
not
False
and
mode_profile
is
not
None
):
raise
TypeError
(
'profile passed via both "mode" and "profile" arguments'
)
self
.
profile
=
profile
=
profile
or
mode_profile
if
profile
:
# We preload the cache here to don't have its timming
# included in optimization that compile function.
theano
.
gof
.
cc
.
get_module_cache
()
# Handle the case where inputs and/or outputs is a single Variable (not in a list)
self
.
orig_outputs
=
outputs
unpack_single
=
False
...
...
@@ -1030,6 +1035,8 @@ class FunctionMaker(object):
# make the env (copies the graph, creates NEW INPUT AND OUTPUT VARIABLES)
env
,
additional_outputs
=
std_env
(
expanded_inputs
,
outputs
,
accept_inplace
)
env
.
profile
=
profile
self
.
env
=
env
# Fetch the optimizer and linker
...
...
@@ -1042,13 +1049,15 @@ class FunctionMaker(object):
theano
.
config
.
compute_test_value
=
"off"
gof
.
Op
.
add_stack_trace_on_call
=
False
start_optimizer
=
time
.
time
()
optimizer
(
env
)
optimizer
_profile
=
optimizer
(
env
)
end_optimizer
=
time
.
time
()
opt_time
=
end_optimizer
-
start_optimizer
mode
.
optimizer_time
+=
opt_time
if
profile
:
profile
.
optimizer_time
+=
opt_time
if
theano
.
config
.
profile_optimizer
:
profile
.
optimizer_profile
=
(
optimizer
,
optimizer_profile
)
_logger
.
debug
(
'Optimizing took
%
f seconds'
,
opt_time
)
#Add deep copy to respect the memory interface
...
...
theano/compile/mode.py
浏览文件 @
bb096349
...
...
@@ -88,6 +88,10 @@ OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
OPT_FAST_COMPILE
=
gof
.
Query
(
include
=
[
'fast_compile'
])
OPT_STABILIZE
=
gof
.
Query
(
include
=
[
'fast_run'
])
OPT_STABILIZE
.
position_cutoff
=
1.5000001
OPT_FAST_RUN
.
name
=
'OPT_FAST_RUN'
OPT_FAST_RUN_STABLE
.
name
=
'OPT_FAST_RUN_STABLE'
OPT_FAST_COMPILE
.
name
=
'OPT_FAST_COMPILE'
OPT_STABILIZE
.
name
=
'OPT_STABILIZE'
predefined_optimizers
=
{
None
:
(
lambda
env
:
None
),
...
...
theano/compile/profiling.py
浏览文件 @
bb096349
...
...
@@ -38,12 +38,14 @@ AddConfigVar('profiling.time_thunks',
def
_atexit_print_fn
():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file
"""
printed
=
0
for
ps
in
_atexit_print_list
:
if
ps
.
fct_callcount
or
ps
.
compile_time
>
0
:
ps
.
summary
(
file
=
_atexit_print_file
)
printed
+=
1
else
:
print
'Skipping empty Profile'
if
len
(
_atexit_print_list
)
>
1
:
if
printed
>
1
:
# Make a global profile
cum
=
copy
.
copy
(
_atexit_print_list
[
0
])
cum
.
message
=
"Sum of all printed profiles at exit"
...
...
@@ -51,14 +53,26 @@ def _atexit_print_fn():
# for ps in [ps for ps in _atexit_print_list[1:]
# if not isinstance(ps, ScanProfileStats)]:
for
attr
in
[
"compile_time"
,
"fct_call_time"
,
"fct_callcount"
,
"vm_call_time"
,
"optimizer_time"
,
"linker_time"
]:
"vm_call_time"
,
"optimizer_time"
,
"linker_time"
,
"validate_time"
]:
setattr
(
cum
,
attr
,
getattr
(
cum
,
attr
)
+
getattr
(
ps
,
attr
))
#merge dictonary
for
attr
in
[
"apply_time"
,
"apply_callcount"
,
"apply_cimpl"
,
"outputs_size"
]:
cum_attr
=
getattr
(
cum
,
attr
)
for
key
,
val
in
getattr
(
ps
,
attr
)
.
iteritems
():
assert
key
not
in
cum_attr
cum_attr
[
key
]
=
val
if
cum
.
optimizer_profile
and
ps
.
optimizer_profile
:
merge
=
cum
.
optimizer_profile
[
0
]
.
merge_profile
(
cum
.
optimizer_profile
[
1
],
ps
.
optimizer_profile
[
1
])
cum
.
optimizer_profile
=
(
cum
.
optimizer_profile
[
0
],
merge
)
else
:
cum
.
optimizer_profile
=
None
cum
.
summary
(
file
=
_atexit_print_file
)
...
...
@@ -118,11 +132,19 @@ class ProfileStats(object):
optimizer_time
=
0.0
# time spent optimizing graph (FunctionMaker.__init__)
validate_time
=
0.0
# time spent in env.validate
# This is a subset of optimizer_time that is dominated by toposort()
# when the destorymap feature is included.
linker_time
=
0.0
# time spent linking graph (FunctionMaker.create)
line_width
=
140
optimizer_profile
=
None
# None or tuple (the optimizer, the profile it returned)
# param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
**
kwargs
):
...
...
@@ -390,11 +412,15 @@ class ProfileStats(object):
local_time
,
100
*
local_time
/
self
.
fct_call_time
)
print
>>
file
,
' Total compile time:
%
es'
%
self
.
compile_time
print
>>
file
,
' Theano Optimizer time:
%
es'
%
self
.
optimizer_time
print
>>
file
,
' Theano validate time:
%
es'
%
self
.
validate_time
print
>>
file
,
(
' Theano Linker time (includes C,'
' CUDA code generation/compiling):
%
es'
%
self
.
linker_time
)
print
>>
file
,
''
# The validation time is a subset of optimizer_time
assert
self
.
validate_time
<
self
.
optimizer_time
def
summary
(
self
,
file
=
sys
.
stderr
,
n_ops_to_print
=
20
,
n_applies_to_print
=
20
):
self
.
summary_function
(
file
)
...
...
@@ -402,9 +428,13 @@ class ProfileStats(object):
if
local_time
>
0
:
self
.
summary_ops
(
file
,
n_ops_to_print
)
self
.
summary_nodes
(
file
,
n_applies_to_print
)
el
se
:
el
if
self
.
fct_callcount
>
0
:
print
>>
file
,
(
" No node time accumulated "
"(hint: try config profiling.time_thunks=1)"
)
if
self
.
optimizer_profile
:
print
"Optimizer Profile"
print
"-----------------"
self
.
optimizer_profile
[
0
]
.
print_profile
(
file
,
self
.
optimizer_profile
[
1
])
if
0
:
# old code still to be ported from ProfileMode
...
...
theano/gof/env.py
浏览文件 @
bb096349
...
...
@@ -129,6 +129,7 @@ class Env(utils.object2):
self
.
node_locks
=
{}
self
.
variable_locks
=
{}
self
.
profile
=
None
### Setup a Variable ###
...
...
theano/gof/graph.py
浏览文件 @
bb096349
...
...
@@ -567,7 +567,7 @@ def clone(i, o, copy_inputs = True):
:type o: list
:param o: output L{Variable}s
:type copy_inputs: bool
:param copy_inputs: if True, the inputs will be copied (defaults to
Fals
e)
:param copy_inputs: if True, the inputs will be copied (defaults to
Tru
e)
Returns the inputs and outputs of that copy.
"""
...
...
theano/gof/opt.py
浏览文件 @
bb096349
差异被折叠。
点击展开。
theano/gof/optdb.py
浏览文件 @
bb096349
...
...
@@ -229,7 +229,10 @@ class SequenceDB(DB):
opts
=
[
o
for
o
in
opts
if
self
.
__position__
[
o
.
name
]
<
position_cutoff
]
opts
.
sort
(
key
=
lambda
obj
:
self
.
__position__
[
obj
.
name
])
return
opt
.
SeqOptimizer
(
opts
,
failure_callback
=
self
.
failure_callback
)
ret
=
opt
.
SeqOptimizer
(
opts
,
failure_callback
=
self
.
failure_callback
)
if
hasattr
(
tags
[
0
],
'name'
):
ret
.
name
=
tags
[
0
]
.
name
return
ret
def
print_summary
(
self
,
stream
=
sys
.
stdout
):
print
>>
stream
,
"SequenceDB (id
%
i)"
%
id
(
self
)
...
...
theano/gof/toolbox.py
浏览文件 @
bb096349
import
sys
import
time
from
theano.gof.python25
import
partial
...
...
@@ -71,10 +72,20 @@ class History:
class
Validator
:
def
on_attach
(
self
,
env
):
if
hasattr
(
env
,
'validate'
):
raise
AlreadyThere
(
"Validator feature is already present or in"
" conflict with another plugin."
)
env
.
validate
=
lambda
:
env
.
execute_callbacks
(
'validate'
)
for
attr
in
(
'validate'
,
'validate_time'
):
if
hasattr
(
env
,
attr
):
raise
AlreadyThere
(
"Validator feature is already present or in"
" conflict with another plugin."
)
def
validate
():
t0
=
time
.
time
()
ret
=
env
.
execute_callbacks
(
'validate'
)
t1
=
time
.
time
()
if
env
.
profile
:
env
.
profile
.
validate_time
+=
t1
-
t0
return
ret
env
.
validate
=
validate
def
consistent
():
try
:
...
...
theano/gof/vm.py
浏览文件 @
bb096349
...
...
@@ -17,6 +17,9 @@ logger = logging.getLogger(__name__)
AddConfigVar
(
'profile'
,
"If VM should collect profile information"
,
BoolParam
(
False
))
AddConfigVar
(
'profile_optimizer'
,
"If VM should collect optimizer profile information"
,
BoolParam
(
False
))
raise_with_op
=
link
.
raise_with_op
...
...
theano/sandbox/cuda/tests/test_driver.py
浏览文件 @
bb096349
...
...
@@ -24,7 +24,8 @@ def test_nvidia_driver1():
"""
a
=
numpy
.
random
.
rand
(
10000
)
.
astype
(
"float32"
)
A
=
cuda
.
shared_constructor
(
a
)
f
=
theano
.
function
(
inputs
=
[],
outputs
=
A
.
sum
(),
mode
=
mode_with_gpu
)
f
=
theano
.
function
(
inputs
=
[],
outputs
=
A
.
sum
(),
mode
=
mode_with_gpu
,
profile
=
False
)
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
2
assert
sum
(
isinstance
(
node
.
op
,
B
.
GpuSum
)
for
node
in
topo
)
==
1
...
...
@@ -56,7 +57,8 @@ def test_nvidia_driver3():
of the gpu device
"""
var
=
cuda
.
fvector
()
f
=
theano
.
function
([
var
],
var
+
1
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
var
],
var
+
1
,
mode
=
mode_with_gpu
,
profile
=
False
)
topo
=
f
.
maker
.
env
.
toposort
()
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
topo
])
assert
theano
.
sandbox
.
cuda
.
use
.
device_number
is
not
None
...
...
theano/scan_module/scan.py
浏览文件 @
bb096349
...
...
@@ -794,7 +794,8 @@ def scan(fn,
updates
=
updates
,
mode
=
compile
.
mode
.
Mode
(
linker
=
'py'
,
optimizer
=
None
),
on_unused_input
=
'ignore'
)
on_unused_input
=
'ignore'
,
profile
=
False
)
##
### Step 5. Re-arange inputs of scan into a more strict order
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论