Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
671a821d
Unverified
提交
671a821d
authored
11月 26, 2022
作者:
Thomas Wiecki
提交者:
GitHub
11月 26, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Downstream 1312 (#34)
* Replace time.time with time.perf_counter Co-authored-by:
redbopo
<
redbopo.lan@gmail.com
>
上级
4deacacd
隐藏空白字符变更
内嵌
并排
正在显示
22 个修改的文件
包含
144 行增加
和
142 行删除
+144
-142
profiling.py
pytensor/compile/profiling.py
+5
-5
gradient.py
pytensor/gradient.py
+2
-2
features.py
pytensor/graph/features.py
+2
-2
fg.py
pytensor/graph/fg.py
+4
-4
basic.py
pytensor/graph/rewriting/basic.py
+27
-27
cmodule.py
pytensor/link/c/cmodule.py
+11
-9
vm.py
pytensor/link/vm.py
+9
-9
check_blas.py
pytensor/misc/check_blas.py
+2
-2
elemwise_time_test.py
pytensor/misc/elemwise_time_test.py
+2
-2
latence_gpu_transfert.py
pytensor/misc/latence_gpu_transfert.py
+4
-4
op.py
pytensor/scan/op.py
+4
-4
blas.py
pytensor/tensor/blas.py
+7
-7
elemwise.py
pytensor/tensor/rewriting/elemwise.py
+2
-2
test_vm.py
tests/link/test_vm.py
+12
-12
test_rng_mrg.py
tests/sandbox/test_rng_mrg.py
+4
-4
test_sp.py
tests/sparse/sandbox/test_sp.py
+4
-4
test_basic.py
tests/sparse/test_basic.py
+6
-6
speed_test_conv.py
tests/tensor/nnet/speed_test_conv.py
+9
-9
test_conv.py
tests/tensor/nnet/test_conv.py
+2
-2
test_conv3d2d.py
tests/tensor/nnet/test_conv3d2d.py
+12
-12
test_math.py
tests/tensor/rewriting/test_math.py
+12
-12
test_gc.py
tests/tensor/test_gc.py
+2
-2
没有找到文件。
pytensor/compile/profiling.py
浏览文件 @
671a821d
...
...
@@ -43,7 +43,7 @@ def extended_open(filename, mode="r"):
logger
=
logging
.
getLogger
(
"pytensor.compile.profiling"
)
pytensor_imported_time
:
float
=
time
.
time
()
pytensor_imported_time
:
float
=
time
.
perf_counter
()
total_fct_exec_time
:
float
=
0.0
total_graph_rewrite_time
:
float
=
0.0
total_time_linker
:
float
=
0.0
...
...
@@ -165,7 +165,7 @@ def print_global_stats():
print
(
(
"Global stats: "
,
f
"Time elasped since PyTensor import = {time.
time
() - pytensor_imported_time:6.3f}s, "
f
"Time elasped since PyTensor import = {time.
perf_counter
() - pytensor_imported_time:6.3f}s, "
f
"Time spent in PyTensor functions = {total_fct_exec_time:6.3f}s, "
"Time spent compiling PyTensor functions: "
f
"rewriting = {total_graph_rewrite_time:6.3f}s, linking = {total_time_linker:6.3f}s "
,
...
...
@@ -831,7 +831,7 @@ class ProfileStats:
f
"Time in all call to pytensor.grad() {pytensor.gradient.grad_time:e}s"
,
file
=
file
,
)
total_time
=
time
.
time
()
-
pytensor_imported_time
total_time
=
time
.
perf_counter
()
-
pytensor_imported_time
print
(
f
"Time since pytensor import {total_time:.3f}s"
,
file
=
file
)
def
summary_memory
(
self
,
file
,
N
=
None
):
...
...
@@ -1299,9 +1299,9 @@ class ProfileStats:
# Config: whether print min memory peak
if
config
.
profiling__min_peak_memory
:
node_list
=
fgraph
.
apply_nodes
ttt
=
time
.
time
()
ttt
=
time
.
perf_counter
()
min_peak
=
count_minimum_peak
(
node_list
,
fgraph
,
nodes_mem
)
min_peak_time
+=
time
.
time
()
-
ttt
min_peak_time
+=
time
.
perf_counter
()
-
ttt
min_max_peak
=
max
(
min_max_peak
,
min_peak
)
del
fgraph
,
nodes_mem
...
...
pytensor/gradient.py
浏览文件 @
671a821d
...
...
@@ -492,7 +492,7 @@ def grad(
respect to the output, then a zero variable is returned.
"""
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
if
cost
is
None
:
if
known_grads
is
None
:
...
...
@@ -643,7 +643,7 @@ def grad(
else
:
assert
return_disconnected
.
lower
()
==
"disconnected"
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
global
grad_time
grad_time
+=
t1
-
t0
...
...
pytensor/graph/features.py
浏览文件 @
671a821d
...
...
@@ -473,7 +473,7 @@ class Validator(Feature):
exception. replace_all_validate will print out the
verbose output. Or it has to be done here before raise.
"""
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
try
:
ret
=
fgraph
.
execute_callbacks
(
"validate"
)
except
Exception
as
e
:
...
...
@@ -494,7 +494,7 @@ class Validator(Feature):
reason
=
uf_info
.
function
print
(
f
"validate failed on node {r}.
\n
Reason: {reason}, {e}"
)
raise
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
if
fgraph
.
profile
:
fgraph
.
profile
.
validate_time
+=
t1
-
t0
return
ret
...
...
pytensor/graph/fg.py
浏览文件 @
671a821d
...
...
@@ -717,7 +717,7 @@ class FunctionGraph(MetaObject):
a method called after name.
"""
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
for
feature
in
self
.
_features
:
try
:
fn
=
getattr
(
feature
,
name
)
...
...
@@ -726,10 +726,10 @@ class FunctionGraph(MetaObject):
# try; the AttributeError really must come from feature.${name}
# not existing
continue
tf0
=
time
.
time
()
tf0
=
time
.
perf_counter
()
fn
(
self
,
*
args
,
**
kwargs
)
self
.
execute_callbacks_times
[
feature
]
+=
time
.
time
()
-
tf0
self
.
execute_callbacks_time
+=
time
.
time
()
-
t0
self
.
execute_callbacks_times
[
feature
]
+=
time
.
perf_counter
()
-
tf0
self
.
execute_callbacks_time
+=
time
.
perf_counter
()
-
t0
def
collect_callbacks
(
self
,
name
:
str
,
*
args
)
->
Dict
[
Feature
,
Any
]:
"""Collects callbacks
...
...
pytensor/graph/rewriting/basic.py
浏览文件 @
671a821d
...
...
@@ -298,9 +298,9 @@ class SequentialGraphRewriter(GraphRewriter, UserList):
for
rewriter
in
self
.
data
:
try
:
nb_nodes_before
=
len
(
fgraph
.
apply_nodes
)
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
sub_prof
=
rewriter
.
apply
(
fgraph
)
l
.
append
(
float
(
time
.
time
()
-
t0
))
l
.
append
(
float
(
time
.
perf_counter
()
-
t0
))
sub_profs
.
append
(
sub_prof
)
nb_nodes
.
append
((
nb_nodes_before
,
len
(
fgraph
.
apply_nodes
)))
if
fgraph
.
profile
:
...
...
@@ -701,7 +701,7 @@ class MergeOptimizer(GraphRewriter):
def
apply
(
self
,
fgraph
):
sched
=
fgraph
.
merge_feature
.
scheduled
nb_fail
=
0
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
if
fgraph
.
profile
:
validate_before
=
fgraph
.
profile
.
validate_time
callback_before
=
fgraph
.
execute_callbacks_time
...
...
@@ -807,7 +807,7 @@ class MergeOptimizer(GraphRewriter):
return
(
nb_fail
,
time
.
time
()
-
t0
,
time
.
perf_counter
()
-
t0
,
validate_time
,
callback_time
,
callbacks_time
,
...
...
@@ -1066,9 +1066,9 @@ class MetaNodeRewriter(NodeRewriter):
return
self
.
track_dict
[
type
(
node
.
op
)]
def
time_call
(
self
,
fn
):
start
=
time
.
time
()
start
=
time
.
perf_counter
()
fn
()
return
time
.
time
()
-
start
return
time
.
perf_counter
()
-
start
class
FromFunctionNodeRewriter
(
NodeRewriter
):
...
...
@@ -1303,9 +1303,9 @@ class SequentialNodeRewriter(NodeRewriter):
new_repl
=
None
for
rewrite
in
rewrites
:
rewrite_start
=
time
.
time
()
rewrite_start
=
time
.
perf_counter
()
new_repl
=
rewrite
.
transform
(
fgraph
,
node
)
rewrite_finish
=
time
.
time
()
rewrite_finish
=
time
.
perf_counter
()
if
self
.
profile
:
self
.
time_rewrites
[
rewrite
]
+=
rewrite_start
-
rewrite_finish
self
.
process_count
[
rewrite
]
+=
1
...
...
@@ -2026,9 +2026,9 @@ class WalkingGraphRewriter(NodeProcessingGraphRewriter):
start_from
=
fgraph
.
outputs
callback_before
=
fgraph
.
execute_callbacks_time
nb_nodes_start
=
len
(
fgraph
.
apply_nodes
)
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
q
=
deque
(
io_toposort
(
fgraph
.
inputs
,
start_from
))
io_t
=
time
.
time
()
-
t0
io_t
=
time
.
perf_counter
()
-
t0
def
importer
(
node
):
if
node
is
not
current_node
:
...
...
@@ -2039,7 +2039,7 @@ class WalkingGraphRewriter(NodeProcessingGraphRewriter):
)
nb
=
0
try
:
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
while
q
:
if
self
.
order
==
"out_to_in"
:
node
=
q
.
pop
()
...
...
@@ -2049,7 +2049,7 @@ class WalkingGraphRewriter(NodeProcessingGraphRewriter):
continue
current_node
=
node
nb
+=
self
.
process_node
(
fgraph
,
node
)
loop_t
=
time
.
time
()
-
t0
loop_t
=
time
.
perf_counter
()
-
t0
finally
:
self
.
detach_updater
(
fgraph
,
u
)
...
...
@@ -2367,9 +2367,9 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
for
crewriter
in
self
.
cleanup_rewriters
:
change_tracker
.
reset
()
nb
=
change_tracker
.
nb_imported
t_rewrite
=
time
.
time
()
t_rewrite
=
time
.
perf_counter
()
sub_prof
=
crewriter
.
apply
(
fgraph
)
time_rewriters
[
crewriter
]
+=
time
.
time
()
-
t_rewrite
time_rewriters
[
crewriter
]
+=
time
.
perf_counter
()
-
t_rewrite
profs_dict
[
crewriter
]
.
append
(
sub_prof
)
if
change_tracker
.
changed
:
process_count
.
setdefault
(
crewriter
,
0
)
...
...
@@ -2381,7 +2381,7 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
while
changed
and
not
max_use_abort
:
process_count
=
{}
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
changed
=
False
iter_cleanup_sub_profs
=
{}
for
crewrite
in
self
.
cleanup_rewriters
:
...
...
@@ -2392,9 +2392,9 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
for
grewrite
in
self
.
global_rewriters
:
change_tracker
.
reset
()
nb
=
change_tracker
.
nb_imported
t_rewrite
=
time
.
time
()
t_rewrite
=
time
.
perf_counter
()
sub_prof
=
grewrite
.
apply
(
fgraph
)
time_rewriters
[
grewrite
]
+=
time
.
time
()
-
t_rewrite
time_rewriters
[
grewrite
]
+=
time
.
perf_counter
()
-
t_rewrite
sub_profs
.
append
(
sub_prof
)
if
change_tracker
.
changed
:
process_count
.
setdefault
(
grewrite
,
0
)
...
...
@@ -2409,13 +2409,13 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
)
global_sub_profs
.
append
(
sub_profs
)
global_rewriter_timing
.
append
(
float
(
time
.
time
()
-
t0
))
global_rewriter_timing
.
append
(
float
(
time
.
perf_counter
()
-
t0
))
changed
|=
apply_cleanup
(
iter_cleanup_sub_profs
)
topo_t0
=
time
.
time
()
topo_t0
=
time
.
perf_counter
()
q
=
deque
(
io_toposort
(
fgraph
.
inputs
,
start_from
))
io_toposort_timing
.
append
(
time
.
time
()
-
topo_t0
)
io_toposort_timing
.
append
(
time
.
perf_counter
()
-
topo_t0
)
nb_nodes
.
append
(
len
(
q
))
max_nb_nodes
=
max
(
max_nb_nodes
,
len
(
q
))
...
...
@@ -2443,11 +2443,11 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
current_node
=
node
for
node_rewriter
in
self
.
node_tracker
.
get_trackers
(
node
.
op
):
nb
=
change_tracker
.
nb_imported
t_rewrite
=
time
.
time
()
t_rewrite
=
time
.
perf_counter
()
node_rewriter_change
=
self
.
process_node
(
fgraph
,
node
,
node_rewriter
)
time_rewriters
[
node_rewriter
]
+=
time
.
time
()
-
t_rewrite
time_rewriters
[
node_rewriter
]
+=
time
.
perf_counter
()
-
t_rewrite
if
not
node_rewriter_change
:
continue
process_count
.
setdefault
(
node_rewriter
,
0
)
...
...
@@ -2469,13 +2469,13 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
# Apply final rewriters
sub_profs
=
[]
t_before_final_rewrites
=
time
.
time
()
t_before_final_rewrites
=
time
.
perf_counter
()
for
grewrite
in
self
.
final_rewriters
:
change_tracker
.
reset
()
nb
=
change_tracker
.
nb_imported
t_rewrite
=
time
.
time
()
t_rewrite
=
time
.
perf_counter
()
sub_prof
=
grewrite
.
apply
(
fgraph
)
time_rewriters
[
grewrite
]
+=
time
.
time
()
-
t_rewrite
time_rewriters
[
grewrite
]
+=
time
.
perf_counter
()
-
t_rewrite
sub_profs
.
append
(
sub_prof
)
if
change_tracker
.
changed
:
process_count
.
setdefault
(
grewrite
,
0
)
...
...
@@ -2490,7 +2490,7 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
)
final_sub_profs
.
append
(
sub_profs
)
global_rewriter_timing
[
-
1
]
+=
time
.
time
()
-
t_before_final_rewrites
global_rewriter_timing
[
-
1
]
+=
time
.
perf_counter
()
-
t_before_final_rewrites
changed
|=
apply_cleanup
(
iter_cleanup_sub_profs
)
...
...
@@ -2504,7 +2504,7 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
cleanup_sub_profs
.
append
(
c_sub_profs
)
loop_process_count
.
append
(
process_count
)
loop_timing
.
append
(
float
(
time
.
time
()
-
t0
))
loop_timing
.
append
(
float
(
time
.
perf_counter
()
-
t0
))
end_nb_nodes
=
len
(
fgraph
.
apply_nodes
)
...
...
pytensor/link/c/cmodule.py
浏览文件 @
671a821d
...
...
@@ -326,11 +326,11 @@ def dlimport(fullpath, suffix=None):
global
import_time
try
:
importlib
.
invalidate_caches
()
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
with
warnings
.
catch_warnings
():
warnings
.
filterwarnings
(
"ignore"
,
message
=
"numpy.ndarray size changed"
)
rval
=
__import__
(
module_name
,
{},
{},
[
module_name
])
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
import_time
+=
t1
-
t0
if
not
rval
:
raise
Exception
(
"__import__ failed"
,
fullpath
)
...
...
@@ -771,7 +771,7 @@ class ModuleCache:
"""
if
age_thresh_use
is
None
:
age_thresh_use
=
self
.
age_thresh_use
start_time
=
time
.
time
()
start_time
=
time
.
perf_counter
()
too_old_to_use
=
[]
to_delete
=
[]
...
...
@@ -786,7 +786,7 @@ class ModuleCache:
to_delete_empty
.
append
((
args
,
kwargs
))
# add entries that are not in the entry_from_key dictionary
time_now
=
time
.
time
()
time_now
=
time
.
perf_counter
()
# Go through directories in alphabetical order to ensure consistent
# behavior.
try
:
...
...
@@ -956,7 +956,7 @@ class ModuleCache:
# directories in alphabetical order so as to make
# sure all new processes only use the first one.
if
cleanup
:
age
=
time
.
time
()
-
last_access_time
(
entry
)
age
=
time
.
perf_counter
()
-
last_access_time
(
entry
)
if
delete_if_problem
or
age
>
self
.
age_thresh_del
:
rmtree
(
root
,
...
...
@@ -1063,7 +1063,9 @@ class ModuleCache:
if
not
files
:
_rmtree
(
*
a
,
**
kw
)
_logger
.
debug
(
f
"Time needed to refresh cache: {time.time() - start_time}"
)
_logger
.
debug
(
f
"Time needed to refresh cache: {time.perf_counter() - start_time}"
)
return
too_old_to_use
...
...
@@ -1269,7 +1271,7 @@ class ModuleCache:
Its associated pickled file containing a KeyData.
"""
start_time
=
time
.
time
()
start_time
=
time
.
perf_counter
()
# Verify that when we reload the KeyData from the pickled file, the
# same key can be found in it, and is not equal to more than one
# other key.
...
...
@@ -1317,7 +1319,7 @@ class ModuleCache:
f
"The keys are:
\n
{other}
\n
and
\n
{key}
\n
(found in {key_pkl})."
)
self
.
time_spent_in_check_key
+=
time
.
time
()
-
start_time
self
.
time_spent_in_check_key
+=
time
.
perf_counter
()
-
start_time
# default 31 days
age_thresh_del
=
config
.
cmodule__age_thresh_use
+
60
*
60
*
24
*
7
...
...
@@ -1506,7 +1508,7 @@ class ModuleCache:
assert
key
[
0
]
to_del
=
[]
time_now
=
time
.
time
()
time_now
=
time
.
perf_counter
()
for
filename
in
os
.
listdir
(
self
.
dirname
):
if
filename
.
startswith
(
"tmp"
):
try
:
...
...
pytensor/link/vm.py
浏览文件 @
671a821d
...
...
@@ -394,9 +394,9 @@ class Loop(UpdatingVM):
for
thunk
,
node
,
old_storage
in
zip_longest
(
self
.
thunks
,
self
.
nodes
,
self
.
post_thunk_clear
,
fillvalue
=
()
):
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
thunk
()
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
self
.
call_counts
[
i
]
+=
1
self
.
call_times
[
i
]
+=
t1
-
t0
for
old_s
in
old_storage
:
...
...
@@ -515,15 +515,15 @@ class Stack(UpdatingVM):
"""
idx
=
self
.
node_idx
[
node
]
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
rval
=
self
.
thunks
[
idx
]()
self
.
node_executed_order
.
append
(
node
)
# Some thunks on some computers run faster than the granularity
# of the time.
time
clock.
# of the time.
perf_counter
clock.
# Profile output looks buggy if a node has run but takes 0 time.
# (and profile code might hide real bugs if it rounds up 0)
dt
=
max
(
time
.
time
()
-
t0
,
1e-10
)
dt
=
max
(
time
.
perf_counter
()
-
t0
,
1e-10
)
if
self
.
callback
is
not
None
:
self
.
callback
(
node
=
node
,
...
...
@@ -1231,21 +1231,21 @@ class VMLinker(LocalLinker):
thunks
=
[]
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
linker_make_thunk_time
=
{}
impl
=
None
if
self
.
c_thunks
is
False
:
impl
=
"py"
for
node
in
order
:
try
:
thunk_start
=
time
.
time
()
thunk_start
=
time
.
perf_counter
()
# no-recycling is done at each VM.__call__ So there is
# no need to cause duplicate c code by passing
# no_recycling here.
thunks
.
append
(
node
.
op
.
make_thunk
(
node
,
storage_map
,
compute_map
,
[],
impl
=
impl
)
)
linker_make_thunk_time
[
node
]
=
time
.
time
()
-
thunk_start
linker_make_thunk_time
[
node
]
=
time
.
perf_counter
()
-
thunk_start
if
not
hasattr
(
thunks
[
-
1
],
"lazy"
):
# We don't want all ops maker to think about lazy Ops.
# So if they didn't specify that its lazy or not, it isn't.
...
...
@@ -1254,7 +1254,7 @@ class VMLinker(LocalLinker):
except
Exception
:
raise_with_op
(
fgraph
,
node
)
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
if
self
.
profile
:
self
.
profile
.
linker_node_make_thunks
+=
t1
-
t0
...
...
pytensor/misc/check_blas.py
浏览文件 @
671a821d
...
...
@@ -82,12 +82,12 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, iters=10, order=
if
sync
:
# Make sure we don't include the time from the first call
c
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
.
sync
()
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
for
i
in
range
(
iters
):
f
()
if
sync
:
c
.
get_value
(
borrow
=
True
,
return_internal_type
=
True
)
.
sync
()
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
return
t1
-
t0
,
impl
...
...
pytensor/misc/elemwise_time_test.py
浏览文件 @
671a821d
...
...
@@ -34,9 +34,9 @@ parser.add_option(
def
evalTime
(
f
,
v
,
script
=
False
,
loops
=
1000
):
min
=
1e10
for
i
in
range
(
0
,
loops
):
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
f
(
v
)
dt
=
time
.
time
()
-
t0
dt
=
time
.
perf_counter
()
-
t0
min
=
dt
if
dt
<
min
else
min
if
not
script
:
print
(
f
" run time in {int(loops)} loops was {min:2.9f} sec"
)
...
...
pytensor/misc/latence_gpu_transfert.py
浏览文件 @
671a821d
...
...
@@ -13,12 +13,12 @@ print(f1.maker.fgraph.toposort())
print
(
f2
.
maker
.
fgraph
.
toposort
())
for
i
in
(
1
,
10
,
100
,
1000
,
10000
,
100000
,
1000000
,
10000000
):
o
=
np
.
zeros
(
i
,
dtype
=
"float32"
)
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
f1
(
o
)
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
tf1
=
t1
-
t0
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
f2
()
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
print
(
"
%8
i
%6.1
f ns
%7.1
f ns"
%
(
i
,
tf1
*
1e6
,
(
t1
-
t0
)
*
1e6
))
pytensor/scan/op.py
浏览文件 @
671a821d
...
...
@@ -1721,7 +1721,7 @@ class Scan(Op, ScanMethodsMixin, HasInnerGraph):
"""
info
=
self
.
info
# 1. Unzip the number of steps and sequences.
t0_call
=
time
.
time
()
t0_call
=
time
.
perf_counter
()
t_fn
=
0
n_steps
=
inputs
[
0
]
seqs
=
[]
...
...
@@ -1942,7 +1942,7 @@ class Scan(Op, ScanMethodsMixin, HasInnerGraph):
old_mitmot_input_data
[
idx
]
=
var
.
data
# 5.1 compute outputs
t0_fn
=
time
.
time
()
t0_fn
=
time
.
perf_counter
()
try
:
vm
()
...
...
@@ -1970,7 +1970,7 @@ class Scan(Op, ScanMethodsMixin, HasInnerGraph):
# old-style linkers raise their own exceptions
raise
dt_fn
=
time
.
time
()
-
t0_fn
dt_fn
=
time
.
perf_counter
()
-
t0_fn
if
info
.
as_while
:
pdx
=
offset
+
info
.
n_shared_outs
cond
=
inner_output_storage
[
pdx
]
.
storage
[
0
]
==
0
...
...
@@ -2196,7 +2196,7 @@ class Scan(Op, ScanMethodsMixin, HasInnerGraph):
for
o_s
in
inner_output_storage
:
o_s
.
storage
[
0
]
=
None
t_call
=
time
.
time
()
-
t0_call
t_call
=
time
.
perf_counter
()
-
t0_call
# NOTE: make this match what's in function.types.Function
# and this little string helps us to find this spot:
# "PROFILE_CODE"
...
...
pytensor/tensor/blas.py
浏览文件 @
671a821d
...
...
@@ -530,7 +530,7 @@ class GemmRelated(COp):
#ifndef MOD
#define MOD
%
#endif
static double time_time() // a time function like time.
time
()
static double time_time() // a time function like time.
perf_counter
()
{
struct timeval tv;
gettimeofday(&tv, 0);
...
...
@@ -1488,15 +1488,15 @@ def _gemm_from_node2(fgraph, node):
"""
lst
=
[]
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
_gemm_canonicalize
(
fgraph
,
node
.
outputs
[
0
],
1.0
,
lst
,
0
)
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
if
len
(
lst
)
>
1
:
lst
=
_factor_canonicalized
(
lst
)
t2
=
time
.
time
()
t2
=
time
.
perf_counter
()
rval
=
_gemm_from_factored_list
(
fgraph
,
lst
)
t3
=
time
.
time
()
t3
=
time
.
perf_counter
()
# It can happen that _factor_canonicalized and
# _gemm_from_factored_list return a node with an incorrect
...
...
@@ -1549,9 +1549,9 @@ class GemmOptimizer(GraphRewriter):
fgraph
.
attach_feature
(
u
)
while
did_something
:
nb_iter
+=
1
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
nodelist
=
pytensor
.
graph
.
basic
.
io_toposort
(
fgraph
.
inputs
,
fgraph
.
outputs
)
time_toposort
+=
time
.
time
()
-
t0
time_toposort
+=
time
.
perf_counter
()
-
t0
did_something
=
False
nodelist
.
reverse
()
for
node
in
nodelist
:
...
...
pytensor/tensor/rewriting/elemwise.py
浏览文件 @
671a821d
...
...
@@ -837,9 +837,9 @@ class FusionOptimizer(GraphRewriter):
callbacks_before
=
fgraph
.
execute_callbacks_times
.
copy
()
callback_before
=
fgraph
.
execute_callbacks_time
while
did_something
:
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
nodelist
=
list
(
fgraph
.
toposort
())
time_toposort
+=
time
.
time
()
-
t0
time_toposort
+=
time
.
perf_counter
()
-
t0
nodelist
.
reverse
()
did_something
=
False
for
node
in
nodelist
:
...
...
tests/link/test_vm.py
浏览文件 @
671a821d
...
...
@@ -113,12 +113,12 @@ def test_speed():
x
=
np
.
asarray
([
2.0
,
3.0
],
dtype
=
config
.
floatX
)
numpy_version
(
x
,
steps_a
)
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
# print numpy_version(x, steps_a)
t1
=
time
.
time
()
t2
=
time
.
time
()
t1
=
time
.
perf_counter
()
t2
=
time
.
perf_counter
()
# print numpy_version(x, steps_b)
t3
=
time
.
time
()
t3
=
time
.
perf_counter
()
t_a
=
t1
-
t0
t_b
=
t3
-
t2
...
...
@@ -135,15 +135,15 @@ def test_speed():
f_b
=
function
([
x
],
b
,
mode
=
Mode
(
optimizer
=
None
,
linker
=
linker
()))
f_a
([
2.0
,
3.0
])
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
f_a
([
2.0
,
3.0
])
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
f_b
([
2.0
,
3.0
])
t2
=
time
.
time
()
t2
=
time
.
perf_counter
()
f_b
([
2.0
,
3.0
])
t3
=
time
.
time
()
t3
=
time
.
perf_counter
()
t_a
=
t1
-
t0
t_b
=
t3
-
t2
...
...
@@ -185,15 +185,15 @@ def test_speed_lazy(linker):
f_b
=
function
([
x
],
b
,
mode
=
Mode
(
optimizer
=
None
,
linker
=
linker
))
f_a
([
2.0
])
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
f_a
([
2.0
])
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
f_b
([
2.0
])
t2
=
time
.
time
()
t2
=
time
.
perf_counter
()
f_b
([
2.0
])
t3
=
time
.
time
()
t3
=
time
.
perf_counter
()
t_a
=
t1
-
t0
t_b
=
t3
-
t2
...
...
tests/sandbox/test_rng_mrg.py
浏览文件 @
671a821d
...
...
@@ -199,10 +199,10 @@ def check_basics(
avg_var
=
0.0
for
i
in
range
(
steps
):
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
ival
=
f
(
*
inputs
)
assert
ival
.
shape
==
sample_size
dt
+=
time
.
time
()
-
t0
dt
+=
time
.
perf_counter
()
-
t0
ival
=
np
.
asarray
(
ival
)
if
i
==
0
:
mean
=
np
.
array
(
ival
,
copy
=
True
)
...
...
@@ -733,11 +733,11 @@ def basic_multinomialtest(
avg_pvals
=
np
.
zeros
(
target_pvals
.
shape
,
dtype
=
config
.
floatX
)
for
i
in
range
(
steps
):
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
ival
=
f
()
assert
ival
.
shape
==
sample_size
assert
np
.
all
(
np
.
sum
(
ival
,
axis
=
1
)
==
n_samples
)
dt
+=
time
.
time
()
-
t0
dt
+=
time
.
perf_counter
()
-
t0
avg_pvals
+=
ival
avg_pvals
/=
steps
*
n_samples
...
...
tests/sparse/sandbox/test_sp.py
浏览文件 @
671a821d
...
...
@@ -64,14 +64,14 @@ class TestSP:
fulloutshp
=
np
.
array
(
imshp
)
-
np
.
array
(
kshp
)
+
1
else
:
fulloutshp
=
np
.
array
(
imshp
)
+
np
.
array
(
kshp
)
-
1
ntime1
=
time
.
time
()
ntime1
=
time
.
perf_counter
()
refout
=
np
.
zeros
((
bsize
,)
+
tuple
(
fulloutshp
)
+
(
nkern
,))
for
b
in
range
(
bsize
):
for
n
in
range
(
nkern
):
refout
[
b
,
...
,
n
]
=
convolve2d
(
img2d
[
b
,
:,
:],
filtersflipped
[
n
,
...
],
conv_mode
)
ntot
+=
time
.
time
()
-
ntime1
ntot
+=
time
.
perf_counter
()
-
ntime1
# need to flatten images
bench1
=
refout
[:,
0
::
ss
[
0
],
0
::
ss
[
1
],
:]
.
reshape
(
...
...
@@ -81,9 +81,9 @@ class TestSP:
# swap the last two dimensions (output needs to be nkern x outshp)
bench1
=
np
.
swapaxes
(
bench1
,
1
,
2
)
ttime1
=
time
.
time
()
ttime1
=
time
.
perf_counter
()
out1
=
f
(
filters
,
biasvals
,
img1d
)
ttot
+=
time
.
time
()
-
ttime1
ttot
+=
time
.
perf_counter
()
-
ttime1
temp
=
bench1
.
flatten
()
-
out1
.
flatten
()
assert
(
temp
<
1e-5
)
.
all
()
...
...
tests/sparse/test_basic.py
浏览文件 @
671a821d
...
...
@@ -1422,11 +1422,11 @@ class TestStructuredDot:
pytensor_times
=
[]
scipy_times
=
[]
for
i
in
range
(
5
):
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
pytensor_result
=
f
(
spmat
,
mat
)
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
scipy_result
=
spmat
*
mat
t2
=
time
.
time
()
t2
=
time
.
perf_counter
()
pytensor_times
.
append
(
t1
-
t0
)
scipy_times
.
append
(
t2
-
t1
)
...
...
@@ -1467,11 +1467,11 @@ class TestStructuredDot:
]:
spmat
=
sp
.
sparse
.
csr_matrix
(
random_lil
((
M
,
N
),
sparse_dtype
,
nnz
))
mat
=
np
.
asarray
(
np
.
random
.
standard_normal
((
N
,
K
)),
dense_dtype
)
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
pytensor_result
=
f
(
spmat
,
mat
)
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
scipy_result
=
spmat
*
mat
t2
=
time
.
time
()
t2
=
time
.
perf_counter
()
pytensor_time
=
t1
-
t0
scipy_time
=
t2
-
t1
...
...
tests/tensor/nnet/speed_test_conv.py
浏览文件 @
671a821d
...
...
@@ -99,7 +99,7 @@ def exec_multilayer_conv_nnet_old(
(
nkern
,
ConvOp
.
getOutputShape
(
imshp
[
1
:],
kshp
,
ss
,
conv_mode
))
)
time1
=
time
.
time
()
time1
=
time
.
perf_counter
()
outval
=
np
.
zeros
(
np
.
r_
[
bsize
,
outshp
])
if
validate
:
# causes an atexit problem
...
...
@@ -119,7 +119,7 @@ def exec_multilayer_conv_nnet_old(
outval
[
b
,
n
,
...
]
+=
_convolve2d
(
imgval
[
b
,
i
,
...
],
w_flip
[
n
,
i
,
...
],
1
,
val
,
bval
,
0
)[
0
::
ss
[
0
],
0
::
ss
[
1
]]
ntot
+=
time
.
time
()
-
time1
ntot
+=
time
.
perf_counter
()
-
time1
# ConvOp
if
unroll_patch
and
not
unroll_patch_size
:
...
...
@@ -149,18 +149,18 @@ def exec_multilayer_conv_nnet_old(
propup2
=
function
([
inputs4
,
kerns4
],
conv_op
)
propup3
=
function
([
inputs4
,
kerns4
],
conv_op
,
mode
=
Mode
(
linker
=
"py"
))
time1
=
time
.
time
()
time1
=
time
.
perf_counter
()
for
i
in
range
(
repeat
):
hidval2_
=
propup2
(
imgval
,
w_flip
)
hidval2
=
hidval2_
# [:,:,0::ss[0],0::ss[1]]
tctot
+=
time
.
time
()
-
time1
tctot
+=
time
.
perf_counter
()
-
time1
if
conv_op_py
:
time1
=
time
.
time
()
time1
=
time
.
perf_counter
()
for
i
in
range
(
repeat
):
hidval3_
=
propup3
(
imgval
,
w_flip
)
hidval3
=
hidval3_
# [:,:,0::ss[0],0::ss[1]]
tpytot
+=
time
.
time
()
-
time1
tpytot
+=
time
.
perf_counter
()
-
time1
assert
(
np
.
abs
(
hidval2
-
hidval3
)
<
1e-5
)
.
all
()
else
:
tpytot
+=
0
...
...
@@ -223,7 +223,7 @@ def exec_multilayer_conv_nnet(
(
nkern
,
ConvOp
.
getOutputShape
(
imshp
[
1
:],
kshp
,
ss
,
conv_mode
))
)
time1
=
time
.
time
()
time1
=
time
.
perf_counter
()
# outval = np.zeros(np.r_[bsize, outshp])
# ConvOp
...
...
@@ -253,10 +253,10 @@ def exec_multilayer_conv_nnet(
# ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))
propup2
=
function
([
inputs4
,
kerns4
],
conv_op
)
time1
=
time
.
time
()
time1
=
time
.
perf_counter
()
for
i
in
range
(
repeat
):
propup2
(
imgval
,
w_flip
)
tctot
+=
time
.
time
()
-
time1
tctot
+=
time
.
perf_counter
()
-
time1
imshp
=
tuple
(
outshp
)
# imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2])
...
...
tests/tensor/nnet/test_conv.py
浏览文件 @
671a821d
...
...
@@ -615,9 +615,9 @@ class TestConv2D(utt.InferShapeTester):
)
)
pytensor_conv
=
pytensor
.
function
([],
output
,
mode
=
mode
)
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
pytensor_conv
.
vm
(
n_calls
=
n_calls
)
t2
=
time
.
time
()
t2
=
time
.
perf_counter
()
print
(
t2
-
t1
,
end
=
" "
)
print
()
...
...
tests/tensor/nnet/test_conv3d2d.py
浏览文件 @
671a821d
...
...
@@ -127,9 +127,9 @@ def test_conv3d(border_mode):
np
.
arange
(
Nf
*
Tf
*
C
*
Hf
*
Wf
)
.
reshape
(
Nf
,
Tf
,
C
,
Hf
,
Wf
)
.
astype
(
"float32"
)
)
# t0 = time.
time
()
# t0 = time.
perf_counter
()
pyres
=
pyconv3d
(
signals
,
filters
,
border_mode
)
# print(time.
time
() - t0)
# print(time.
perf_counter
() - t0)
s_signals
=
shared
(
signals
)
s_filters
=
shared
(
filters
)
...
...
@@ -146,9 +146,9 @@ def test_conv3d(border_mode):
newconv3d
=
pytensor
.
function
([],
[],
updates
=
{
s_output
:
out
},
mode
=
mode
)
check_diagonal_subtensor_view_traces
(
newconv3d
)
# t0 = time.
time
()
# t0 = time.
perf_counter
()
newconv3d
()
# print(time.
time
() - t0)
# print(time.
perf_counter
() - t0)
utt
.
assert_allclose
(
pyres
,
s_output
.
get_value
(
borrow
=
True
))
gsignals
,
gfilters
=
pytensor
.
grad
(
out
.
sum
(),
[
s_signals
,
s_filters
])
gnewconv3d
=
pytensor
.
function
(
...
...
@@ -160,9 +160,9 @@ def test_conv3d(border_mode):
)
check_diagonal_subtensor_view_traces
(
gnewconv3d
)
# t0 = time.
time
()
# t0 = time.
perf_counter
()
gnewconv3d
()
# print("grad", time.
time
() - t0)
# print("grad", time.
perf_counter
() - t0)
Ns
,
Ts
,
C
,
Hs
,
Ws
=
3
,
3
,
3
,
5
,
5
Nf
,
Tf
,
C
,
Hf
,
Wf
=
4
,
2
,
3
,
2
,
2
...
...
@@ -189,9 +189,9 @@ def test_conv3d(border_mode):
np
.
arange
(
Nf
*
Tf
*
C
*
Hf
*
Wf
)
.
reshape
(
Nf
,
Tf
,
C
,
Hf
,
Wf
)
.
astype
(
"float32"
)
)
# t0 = time.
time
()
# t0 = time.
perf_counter
()
pyres
=
pyconv3d
(
signals
,
filters
,
border_mode
)
# print(time.
time
() - t0)
# print(time.
perf_counter
() - t0)
s_signals
=
shared
(
signals
)
s_filters
=
shared
(
filters
)
...
...
@@ -207,9 +207,9 @@ def test_conv3d(border_mode):
newconv3d
=
pytensor
.
function
([],
[],
updates
=
{
s_output
:
out
},
mode
=
mode
)
# t0 = time.
time
()
# t0 = time.
perf_counter
()
newconv3d
()
# print(time.
time
() - t0)
# print(time.
perf_counter
() - t0)
utt
.
assert_allclose
(
pyres
,
s_output
.
get_value
(
borrow
=
True
))
gsignals
,
gfilters
=
pytensor
.
grad
(
out
.
sum
(),
[
s_signals
,
s_filters
])
gnewconv3d
=
pytensor
.
function
(
...
...
@@ -220,9 +220,9 @@ def test_conv3d(border_mode):
name
=
"grad"
,
)
# t0 = time.
time
()
# t0 = time.
perf_counter
()
gnewconv3d
()
# print("grad", time.
time
() - t0)
# print("grad", time.
perf_counter
() - t0)
Ns
,
Ts
,
C
,
Hs
,
Ws
=
3
,
3
,
3
,
5
,
5
Nf
,
Tf
,
C
,
Hf
,
Wf
=
4
,
1
,
3
,
2
,
2
...
...
tests/tensor/rewriting/test_math.py
浏览文件 @
671a821d
...
...
@@ -1739,15 +1739,15 @@ class TestFusion:
f
=
function
(
list
(
sym_inputs
),
g
,
mode
=
mode
)
for
x
in
range
(
nb_repeat
):
out
=
f
(
*
val_inputs
)
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
else
:
out
=
shared_fn
(
np
.
zeros
(
shp
,
dtype
=
out_dtype
),
"out"
)
assert
out
.
dtype
==
g
.
dtype
f
=
function
(
sym_inputs
,
[],
updates
=
[(
out
,
g
)],
mode
=
mode
)
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
for
x
in
range
(
nb_repeat
):
f
(
*
val_inputs
)
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
out
=
out
.
get_value
()
times
[
id
]
=
t1
-
t0
...
...
@@ -2331,11 +2331,11 @@ def speed_local_pow_specialize_range():
f1
=
function
([
v
],
v
**
i
,
mode
=
mode
)
f2
=
function
([
v
],
v
**
i
,
mode
=
mode_without_pow_rewrite
)
assert
len
(
f1
.
maker
.
fgraph
.
toposort
())
==
1
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
f1
(
val
)
t2
=
time
.
time
()
t2
=
time
.
perf_counter
()
f2
(
val
)
t3
=
time
.
time
()
t3
=
time
.
perf_counter
()
print
(
i
,
t2
-
t1
,
t3
-
t2
,
t2
-
t1
<
t3
-
t2
)
if
not
t2
-
t1
<
t3
-
t2
:
print
(
"WARNING WE ARE SLOWER"
)
...
...
@@ -2343,11 +2343,11 @@ def speed_local_pow_specialize_range():
f1
=
function
([
v
],
v
**
i
,
mode
=
mode
)
f2
=
function
([
v
],
v
**
i
,
mode
=
mode_without_pow_rewrite
)
assert
len
(
f1
.
maker
.
fgraph
.
toposort
())
==
1
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
f1
(
val
)
t2
=
time
.
time
()
t2
=
time
.
perf_counter
()
f2
(
val
)
t3
=
time
.
time
()
t3
=
time
.
perf_counter
()
print
(
i
,
t2
-
t1
,
t3
-
t2
,
t2
-
t1
<
t3
-
t2
)
if
not
t2
-
t1
<
t3
-
t2
:
print
(
"WARNING WE ARE SLOWER"
)
...
...
@@ -3119,11 +3119,11 @@ class TestLocalErfc:
f2
=
function
([
x
],
log
(
erfc
(
x
)),
mode
=
mode
)
print
(
f1
.
maker
.
fgraph
.
toposort
())
print
(
f2
.
maker
.
fgraph
.
toposort
())
t0
=
time
.
time
()
t0
=
time
.
perf_counter
()
f1
(
val
)
t1
=
time
.
time
()
t1
=
time
.
perf_counter
()
f2
(
val
)
t2
=
time
.
time
()
t2
=
time
.
perf_counter
()
print
(
t1
-
t0
,
t2
-
t1
)
...
...
tests/tensor/test_gc.py
浏览文件 @
671a821d
...
...
@@ -114,13 +114,13 @@ def test_merge_opt_runtime():
for
i
in
range
(
50
):
r
=
r
+
r
/
10
t
=
time
.
time
()
t
=
time
.
perf_counter
()
pytensor
.
function
([
x
],
r
,
mode
=
"FAST_COMPILE"
)
# FAST_RUN does in-place optimizer which requires a lot of
# toposorting, which is actually pretty slow at the moment. This
# test was designed to test MergeOptimizer... so I'm leaving
# toposort optimizations for a later date.
dt
=
time
.
time
()
-
t
dt
=
time
.
perf_counter
()
-
t
# it should never take longer than 5 seconds to compile this graph
assert
dt
<
5.0
,
dt
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论