Unverified 提交 671a821d authored 作者: Thomas Wiecki's avatar Thomas Wiecki 提交者: GitHub

Downstream 1312 (#34)

* Replace time.time with time.perf_counter Co-authored-by: 's avatarredbopo <redbopo.lan@gmail.com>
上级 4deacacd
......@@ -43,7 +43,7 @@ def extended_open(filename, mode="r"):
logger = logging.getLogger("pytensor.compile.profiling")
pytensor_imported_time: float = time.time()
pytensor_imported_time: float = time.perf_counter()
total_fct_exec_time: float = 0.0
total_graph_rewrite_time: float = 0.0
total_time_linker: float = 0.0
......@@ -165,7 +165,7 @@ def print_global_stats():
print(
(
"Global stats: ",
f"Time elasped since PyTensor import = {time.time() - pytensor_imported_time:6.3f}s, "
f"Time elasped since PyTensor import = {time.perf_counter() - pytensor_imported_time:6.3f}s, "
f"Time spent in PyTensor functions = {total_fct_exec_time:6.3f}s, "
"Time spent compiling PyTensor functions: "
f"rewriting = {total_graph_rewrite_time:6.3f}s, linking = {total_time_linker:6.3f}s ",
......@@ -831,7 +831,7 @@ class ProfileStats:
f"Time in all call to pytensor.grad() {pytensor.gradient.grad_time:e}s",
file=file,
)
total_time = time.time() - pytensor_imported_time
total_time = time.perf_counter() - pytensor_imported_time
print(f"Time since pytensor import {total_time:.3f}s", file=file)
def summary_memory(self, file, N=None):
......@@ -1299,9 +1299,9 @@ class ProfileStats:
# Config: whether print min memory peak
if config.profiling__min_peak_memory:
node_list = fgraph.apply_nodes
ttt = time.time()
ttt = time.perf_counter()
min_peak = count_minimum_peak(node_list, fgraph, nodes_mem)
min_peak_time += time.time() - ttt
min_peak_time += time.perf_counter() - ttt
min_max_peak = max(min_max_peak, min_peak)
del fgraph, nodes_mem
......
......@@ -492,7 +492,7 @@ def grad(
respect to the output, then a zero variable is returned.
"""
t0 = time.time()
t0 = time.perf_counter()
if cost is None:
if known_grads is None:
......@@ -643,7 +643,7 @@ def grad(
else:
assert return_disconnected.lower() == "disconnected"
t1 = time.time()
t1 = time.perf_counter()
global grad_time
grad_time += t1 - t0
......
......@@ -473,7 +473,7 @@ class Validator(Feature):
exception. replace_all_validate will print out the
verbose output. Or it has to be done here before raise.
"""
t0 = time.time()
t0 = time.perf_counter()
try:
ret = fgraph.execute_callbacks("validate")
except Exception as e:
......@@ -494,7 +494,7 @@ class Validator(Feature):
reason = uf_info.function
print(f"validate failed on node {r}.\n Reason: {reason}, {e}")
raise
t1 = time.time()
t1 = time.perf_counter()
if fgraph.profile:
fgraph.profile.validate_time += t1 - t0
return ret
......
......@@ -717,7 +717,7 @@ class FunctionGraph(MetaObject):
a method called after name.
"""
t0 = time.time()
t0 = time.perf_counter()
for feature in self._features:
try:
fn = getattr(feature, name)
......@@ -726,10 +726,10 @@ class FunctionGraph(MetaObject):
# try; the AttributeError really must come from feature.${name}
# not existing
continue
tf0 = time.time()
tf0 = time.perf_counter()
fn(self, *args, **kwargs)
self.execute_callbacks_times[feature] += time.time() - tf0
self.execute_callbacks_time += time.time() - t0
self.execute_callbacks_times[feature] += time.perf_counter() - tf0
self.execute_callbacks_time += time.perf_counter() - t0
def collect_callbacks(self, name: str, *args) -> Dict[Feature, Any]:
"""Collects callbacks
......
......@@ -298,9 +298,9 @@ class SequentialGraphRewriter(GraphRewriter, UserList):
for rewriter in self.data:
try:
nb_nodes_before = len(fgraph.apply_nodes)
t0 = time.time()
t0 = time.perf_counter()
sub_prof = rewriter.apply(fgraph)
l.append(float(time.time() - t0))
l.append(float(time.perf_counter() - t0))
sub_profs.append(sub_prof)
nb_nodes.append((nb_nodes_before, len(fgraph.apply_nodes)))
if fgraph.profile:
......@@ -701,7 +701,7 @@ class MergeOptimizer(GraphRewriter):
def apply(self, fgraph):
sched = fgraph.merge_feature.scheduled
nb_fail = 0
t0 = time.time()
t0 = time.perf_counter()
if fgraph.profile:
validate_before = fgraph.profile.validate_time
callback_before = fgraph.execute_callbacks_time
......@@ -807,7 +807,7 @@ class MergeOptimizer(GraphRewriter):
return (
nb_fail,
time.time() - t0,
time.perf_counter() - t0,
validate_time,
callback_time,
callbacks_time,
......@@ -1066,9 +1066,9 @@ class MetaNodeRewriter(NodeRewriter):
return self.track_dict[type(node.op)]
def time_call(self, fn):
start = time.time()
start = time.perf_counter()
fn()
return time.time() - start
return time.perf_counter() - start
class FromFunctionNodeRewriter(NodeRewriter):
......@@ -1303,9 +1303,9 @@ class SequentialNodeRewriter(NodeRewriter):
new_repl = None
for rewrite in rewrites:
rewrite_start = time.time()
rewrite_start = time.perf_counter()
new_repl = rewrite.transform(fgraph, node)
rewrite_finish = time.time()
rewrite_finish = time.perf_counter()
if self.profile:
self.time_rewrites[rewrite] += rewrite_start - rewrite_finish
self.process_count[rewrite] += 1
......@@ -2026,9 +2026,9 @@ class WalkingGraphRewriter(NodeProcessingGraphRewriter):
start_from = fgraph.outputs
callback_before = fgraph.execute_callbacks_time
nb_nodes_start = len(fgraph.apply_nodes)
t0 = time.time()
t0 = time.perf_counter()
q = deque(io_toposort(fgraph.inputs, start_from))
io_t = time.time() - t0
io_t = time.perf_counter() - t0
def importer(node):
if node is not current_node:
......@@ -2039,7 +2039,7 @@ class WalkingGraphRewriter(NodeProcessingGraphRewriter):
)
nb = 0
try:
t0 = time.time()
t0 = time.perf_counter()
while q:
if self.order == "out_to_in":
node = q.pop()
......@@ -2049,7 +2049,7 @@ class WalkingGraphRewriter(NodeProcessingGraphRewriter):
continue
current_node = node
nb += self.process_node(fgraph, node)
loop_t = time.time() - t0
loop_t = time.perf_counter() - t0
finally:
self.detach_updater(fgraph, u)
......@@ -2367,9 +2367,9 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
for crewriter in self.cleanup_rewriters:
change_tracker.reset()
nb = change_tracker.nb_imported
t_rewrite = time.time()
t_rewrite = time.perf_counter()
sub_prof = crewriter.apply(fgraph)
time_rewriters[crewriter] += time.time() - t_rewrite
time_rewriters[crewriter] += time.perf_counter() - t_rewrite
profs_dict[crewriter].append(sub_prof)
if change_tracker.changed:
process_count.setdefault(crewriter, 0)
......@@ -2381,7 +2381,7 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
while changed and not max_use_abort:
process_count = {}
t0 = time.time()
t0 = time.perf_counter()
changed = False
iter_cleanup_sub_profs = {}
for crewrite in self.cleanup_rewriters:
......@@ -2392,9 +2392,9 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
for grewrite in self.global_rewriters:
change_tracker.reset()
nb = change_tracker.nb_imported
t_rewrite = time.time()
t_rewrite = time.perf_counter()
sub_prof = grewrite.apply(fgraph)
time_rewriters[grewrite] += time.time() - t_rewrite
time_rewriters[grewrite] += time.perf_counter() - t_rewrite
sub_profs.append(sub_prof)
if change_tracker.changed:
process_count.setdefault(grewrite, 0)
......@@ -2409,13 +2409,13 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
)
global_sub_profs.append(sub_profs)
global_rewriter_timing.append(float(time.time() - t0))
global_rewriter_timing.append(float(time.perf_counter() - t0))
changed |= apply_cleanup(iter_cleanup_sub_profs)
topo_t0 = time.time()
topo_t0 = time.perf_counter()
q = deque(io_toposort(fgraph.inputs, start_from))
io_toposort_timing.append(time.time() - topo_t0)
io_toposort_timing.append(time.perf_counter() - topo_t0)
nb_nodes.append(len(q))
max_nb_nodes = max(max_nb_nodes, len(q))
......@@ -2443,11 +2443,11 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
current_node = node
for node_rewriter in self.node_tracker.get_trackers(node.op):
nb = change_tracker.nb_imported
t_rewrite = time.time()
t_rewrite = time.perf_counter()
node_rewriter_change = self.process_node(
fgraph, node, node_rewriter
)
time_rewriters[node_rewriter] += time.time() - t_rewrite
time_rewriters[node_rewriter] += time.perf_counter() - t_rewrite
if not node_rewriter_change:
continue
process_count.setdefault(node_rewriter, 0)
......@@ -2469,13 +2469,13 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
# Apply final rewriters
sub_profs = []
t_before_final_rewrites = time.time()
t_before_final_rewrites = time.perf_counter()
for grewrite in self.final_rewriters:
change_tracker.reset()
nb = change_tracker.nb_imported
t_rewrite = time.time()
t_rewrite = time.perf_counter()
sub_prof = grewrite.apply(fgraph)
time_rewriters[grewrite] += time.time() - t_rewrite
time_rewriters[grewrite] += time.perf_counter() - t_rewrite
sub_profs.append(sub_prof)
if change_tracker.changed:
process_count.setdefault(grewrite, 0)
......@@ -2490,7 +2490,7 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
)
final_sub_profs.append(sub_profs)
global_rewriter_timing[-1] += time.time() - t_before_final_rewrites
global_rewriter_timing[-1] += time.perf_counter() - t_before_final_rewrites
changed |= apply_cleanup(iter_cleanup_sub_profs)
......@@ -2504,7 +2504,7 @@ class EquilibriumGraphRewriter(NodeProcessingGraphRewriter):
cleanup_sub_profs.append(c_sub_profs)
loop_process_count.append(process_count)
loop_timing.append(float(time.time() - t0))
loop_timing.append(float(time.perf_counter() - t0))
end_nb_nodes = len(fgraph.apply_nodes)
......
......@@ -326,11 +326,11 @@ def dlimport(fullpath, suffix=None):
global import_time
try:
importlib.invalidate_caches()
t0 = time.time()
t0 = time.perf_counter()
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="numpy.ndarray size changed")
rval = __import__(module_name, {}, {}, [module_name])
t1 = time.time()
t1 = time.perf_counter()
import_time += t1 - t0
if not rval:
raise Exception("__import__ failed", fullpath)
......@@ -771,7 +771,7 @@ class ModuleCache:
"""
if age_thresh_use is None:
age_thresh_use = self.age_thresh_use
start_time = time.time()
start_time = time.perf_counter()
too_old_to_use = []
to_delete = []
......@@ -786,7 +786,7 @@ class ModuleCache:
to_delete_empty.append((args, kwargs))
# add entries that are not in the entry_from_key dictionary
time_now = time.time()
time_now = time.perf_counter()
# Go through directories in alphabetical order to ensure consistent
# behavior.
try:
......@@ -956,7 +956,7 @@ class ModuleCache:
# directories in alphabetical order so as to make
# sure all new processes only use the first one.
if cleanup:
age = time.time() - last_access_time(entry)
age = time.perf_counter() - last_access_time(entry)
if delete_if_problem or age > self.age_thresh_del:
rmtree(
root,
......@@ -1063,7 +1063,9 @@ class ModuleCache:
if not files:
_rmtree(*a, **kw)
_logger.debug(f"Time needed to refresh cache: {time.time() - start_time}")
_logger.debug(
f"Time needed to refresh cache: {time.perf_counter() - start_time}"
)
return too_old_to_use
......@@ -1269,7 +1271,7 @@ class ModuleCache:
Its associated pickled file containing a KeyData.
"""
start_time = time.time()
start_time = time.perf_counter()
# Verify that when we reload the KeyData from the pickled file, the
# same key can be found in it, and is not equal to more than one
# other key.
......@@ -1317,7 +1319,7 @@ class ModuleCache:
f"The keys are:\n {other}\nand\n {key}\n(found in {key_pkl})."
)
self.time_spent_in_check_key += time.time() - start_time
self.time_spent_in_check_key += time.perf_counter() - start_time
# default 31 days
age_thresh_del = config.cmodule__age_thresh_use + 60 * 60 * 24 * 7
......@@ -1506,7 +1508,7 @@ class ModuleCache:
assert key[0]
to_del = []
time_now = time.time()
time_now = time.perf_counter()
for filename in os.listdir(self.dirname):
if filename.startswith("tmp"):
try:
......
......@@ -394,9 +394,9 @@ class Loop(UpdatingVM):
for thunk, node, old_storage in zip_longest(
self.thunks, self.nodes, self.post_thunk_clear, fillvalue=()
):
t0 = time.time()
t0 = time.perf_counter()
thunk()
t1 = time.time()
t1 = time.perf_counter()
self.call_counts[i] += 1
self.call_times[i] += t1 - t0
for old_s in old_storage:
......@@ -515,15 +515,15 @@ class Stack(UpdatingVM):
"""
idx = self.node_idx[node]
t0 = time.time()
t0 = time.perf_counter()
rval = self.thunks[idx]()
self.node_executed_order.append(node)
# Some thunks on some computers run faster than the granularity
# of the time.time clock.
# of the time.perf_counter clock.
# Profile output looks buggy if a node has run but takes 0 time.
# (and profile code might hide real bugs if it rounds up 0)
dt = max(time.time() - t0, 1e-10)
dt = max(time.perf_counter() - t0, 1e-10)
if self.callback is not None:
self.callback(
node=node,
......@@ -1231,21 +1231,21 @@ class VMLinker(LocalLinker):
thunks = []
t0 = time.time()
t0 = time.perf_counter()
linker_make_thunk_time = {}
impl = None
if self.c_thunks is False:
impl = "py"
for node in order:
try:
thunk_start = time.time()
thunk_start = time.perf_counter()
# no-recycling is done at each VM.__call__ So there is
# no need to cause duplicate c code by passing
# no_recycling here.
thunks.append(
node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
)
linker_make_thunk_time[node] = time.time() - thunk_start
linker_make_thunk_time[node] = time.perf_counter() - thunk_start
if not hasattr(thunks[-1], "lazy"):
# We don't want all ops maker to think about lazy Ops.
# So if they didn't specify that its lazy or not, it isn't.
......@@ -1254,7 +1254,7 @@ class VMLinker(LocalLinker):
except Exception:
raise_with_op(fgraph, node)
t1 = time.time()
t1 = time.perf_counter()
if self.profile:
self.profile.linker_node_make_thunks += t1 - t0
......
......@@ -82,12 +82,12 @@ def execute(execute=True, verbose=True, M=2000, N=2000, K=2000, iters=10, order=
if sync:
# Make sure we don't include the time from the first call
c.get_value(borrow=True, return_internal_type=True).sync()
t0 = time.time()
t0 = time.perf_counter()
for i in range(iters):
f()
if sync:
c.get_value(borrow=True, return_internal_type=True).sync()
t1 = time.time()
t1 = time.perf_counter()
return t1 - t0, impl
......
......@@ -34,9 +34,9 @@ parser.add_option(
def evalTime(f, v, script=False, loops=1000):
min = 1e10
for i in range(0, loops):
t0 = time.time()
t0 = time.perf_counter()
f(v)
dt = time.time() - t0
dt = time.perf_counter() - t0
min = dt if dt < min else min
if not script:
print(f" run time in {int(loops)} loops was {min:2.9f} sec")
......
......@@ -13,12 +13,12 @@ print(f1.maker.fgraph.toposort())
print(f2.maker.fgraph.toposort())
for i in (1, 10, 100, 1000, 10000, 100000, 1000000, 10000000):
o = np.zeros(i, dtype="float32")
t0 = time.time()
t0 = time.perf_counter()
f1(o)
t1 = time.time()
t1 = time.perf_counter()
tf1 = t1 - t0
t0 = time.time()
t0 = time.perf_counter()
f2()
t1 = time.time()
t1 = time.perf_counter()
print("%8i %6.1f ns %7.1f ns" % (i, tf1 * 1e6, (t1 - t0) * 1e6))
......@@ -1721,7 +1721,7 @@ class Scan(Op, ScanMethodsMixin, HasInnerGraph):
"""
info = self.info
# 1. Unzip the number of steps and sequences.
t0_call = time.time()
t0_call = time.perf_counter()
t_fn = 0
n_steps = inputs[0]
seqs = []
......@@ -1942,7 +1942,7 @@ class Scan(Op, ScanMethodsMixin, HasInnerGraph):
old_mitmot_input_data[idx] = var.data
# 5.1 compute outputs
t0_fn = time.time()
t0_fn = time.perf_counter()
try:
vm()
......@@ -1970,7 +1970,7 @@ class Scan(Op, ScanMethodsMixin, HasInnerGraph):
# old-style linkers raise their own exceptions
raise
dt_fn = time.time() - t0_fn
dt_fn = time.perf_counter() - t0_fn
if info.as_while:
pdx = offset + info.n_shared_outs
cond = inner_output_storage[pdx].storage[0] == 0
......@@ -2196,7 +2196,7 @@ class Scan(Op, ScanMethodsMixin, HasInnerGraph):
for o_s in inner_output_storage:
o_s.storage[0] = None
t_call = time.time() - t0_call
t_call = time.perf_counter() - t0_call
# NOTE: make this match what's in function.types.Function
# and this little string helps us to find this spot:
# "PROFILE_CODE"
......
......@@ -530,7 +530,7 @@ class GemmRelated(COp):
#ifndef MOD
#define MOD %
#endif
static double time_time() // a time function like time.time()
static double time_time() // a time function like time.perf_counter()
{
struct timeval tv;
gettimeofday(&tv, 0);
......@@ -1488,15 +1488,15 @@ def _gemm_from_node2(fgraph, node):
"""
lst = []
t0 = time.time()
t0 = time.perf_counter()
_gemm_canonicalize(fgraph, node.outputs[0], 1.0, lst, 0)
t1 = time.time()
t1 = time.perf_counter()
if len(lst) > 1:
lst = _factor_canonicalized(lst)
t2 = time.time()
t2 = time.perf_counter()
rval = _gemm_from_factored_list(fgraph, lst)
t3 = time.time()
t3 = time.perf_counter()
# It can happen that _factor_canonicalized and
# _gemm_from_factored_list return a node with an incorrect
......@@ -1549,9 +1549,9 @@ class GemmOptimizer(GraphRewriter):
fgraph.attach_feature(u)
while did_something:
nb_iter += 1
t0 = time.time()
t0 = time.perf_counter()
nodelist = pytensor.graph.basic.io_toposort(fgraph.inputs, fgraph.outputs)
time_toposort += time.time() - t0
time_toposort += time.perf_counter() - t0
did_something = False
nodelist.reverse()
for node in nodelist:
......
......@@ -837,9 +837,9 @@ class FusionOptimizer(GraphRewriter):
callbacks_before = fgraph.execute_callbacks_times.copy()
callback_before = fgraph.execute_callbacks_time
while did_something:
t0 = time.time()
t0 = time.perf_counter()
nodelist = list(fgraph.toposort())
time_toposort += time.time() - t0
time_toposort += time.perf_counter() - t0
nodelist.reverse()
did_something = False
for node in nodelist:
......
......@@ -113,12 +113,12 @@ def test_speed():
x = np.asarray([2.0, 3.0], dtype=config.floatX)
numpy_version(x, steps_a)
t0 = time.time()
t0 = time.perf_counter()
# print numpy_version(x, steps_a)
t1 = time.time()
t2 = time.time()
t1 = time.perf_counter()
t2 = time.perf_counter()
# print numpy_version(x, steps_b)
t3 = time.time()
t3 = time.perf_counter()
t_a = t1 - t0
t_b = t3 - t2
......@@ -135,15 +135,15 @@ def test_speed():
f_b = function([x], b, mode=Mode(optimizer=None, linker=linker()))
f_a([2.0, 3.0])
t0 = time.time()
t0 = time.perf_counter()
f_a([2.0, 3.0])
t1 = time.time()
t1 = time.perf_counter()
f_b([2.0, 3.0])
t2 = time.time()
t2 = time.perf_counter()
f_b([2.0, 3.0])
t3 = time.time()
t3 = time.perf_counter()
t_a = t1 - t0
t_b = t3 - t2
......@@ -185,15 +185,15 @@ def test_speed_lazy(linker):
f_b = function([x], b, mode=Mode(optimizer=None, linker=linker))
f_a([2.0])
t0 = time.time()
t0 = time.perf_counter()
f_a([2.0])
t1 = time.time()
t1 = time.perf_counter()
f_b([2.0])
t2 = time.time()
t2 = time.perf_counter()
f_b([2.0])
t3 = time.time()
t3 = time.perf_counter()
t_a = t1 - t0
t_b = t3 - t2
......
......@@ -199,10 +199,10 @@ def check_basics(
avg_var = 0.0
for i in range(steps):
t0 = time.time()
t0 = time.perf_counter()
ival = f(*inputs)
assert ival.shape == sample_size
dt += time.time() - t0
dt += time.perf_counter() - t0
ival = np.asarray(ival)
if i == 0:
mean = np.array(ival, copy=True)
......@@ -733,11 +733,11 @@ def basic_multinomialtest(
avg_pvals = np.zeros(target_pvals.shape, dtype=config.floatX)
for i in range(steps):
t0 = time.time()
t0 = time.perf_counter()
ival = f()
assert ival.shape == sample_size
assert np.all(np.sum(ival, axis=1) == n_samples)
dt += time.time() - t0
dt += time.perf_counter() - t0
avg_pvals += ival
avg_pvals /= steps * n_samples
......
......@@ -64,14 +64,14 @@ class TestSP:
fulloutshp = np.array(imshp) - np.array(kshp) + 1
else:
fulloutshp = np.array(imshp) + np.array(kshp) - 1
ntime1 = time.time()
ntime1 = time.perf_counter()
refout = np.zeros((bsize,) + tuple(fulloutshp) + (nkern,))
for b in range(bsize):
for n in range(nkern):
refout[b, ..., n] = convolve2d(
img2d[b, :, :], filtersflipped[n, ...], conv_mode
)
ntot += time.time() - ntime1
ntot += time.perf_counter() - ntime1
# need to flatten images
bench1 = refout[:, 0 :: ss[0], 0 :: ss[1], :].reshape(
......@@ -81,9 +81,9 @@ class TestSP:
# swap the last two dimensions (output needs to be nkern x outshp)
bench1 = np.swapaxes(bench1, 1, 2)
ttime1 = time.time()
ttime1 = time.perf_counter()
out1 = f(filters, biasvals, img1d)
ttot += time.time() - ttime1
ttot += time.perf_counter() - ttime1
temp = bench1.flatten() - out1.flatten()
assert (temp < 1e-5).all()
......
......@@ -1422,11 +1422,11 @@ class TestStructuredDot:
pytensor_times = []
scipy_times = []
for i in range(5):
t0 = time.time()
t0 = time.perf_counter()
pytensor_result = f(spmat, mat)
t1 = time.time()
t1 = time.perf_counter()
scipy_result = spmat * mat
t2 = time.time()
t2 = time.perf_counter()
pytensor_times.append(t1 - t0)
scipy_times.append(t2 - t1)
......@@ -1467,11 +1467,11 @@ class TestStructuredDot:
]:
spmat = sp.sparse.csr_matrix(random_lil((M, N), sparse_dtype, nnz))
mat = np.asarray(np.random.standard_normal((N, K)), dense_dtype)
t0 = time.time()
t0 = time.perf_counter()
pytensor_result = f(spmat, mat)
t1 = time.time()
t1 = time.perf_counter()
scipy_result = spmat * mat
t2 = time.time()
t2 = time.perf_counter()
pytensor_time = t1 - t0
scipy_time = t2 - t1
......
......@@ -99,7 +99,7 @@ def exec_multilayer_conv_nnet_old(
(nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))
)
time1 = time.time()
time1 = time.perf_counter()
outval = np.zeros(np.r_[bsize, outshp])
if validate:
# causes an atexit problem
......@@ -119,7 +119,7 @@ def exec_multilayer_conv_nnet_old(
outval[b, n, ...] += _convolve2d(
imgval[b, i, ...], w_flip[n, i, ...], 1, val, bval, 0
)[0 :: ss[0], 0 :: ss[1]]
ntot += time.time() - time1
ntot += time.perf_counter() - time1
# ConvOp
if unroll_patch and not unroll_patch_size:
......@@ -149,18 +149,18 @@ def exec_multilayer_conv_nnet_old(
propup2 = function([inputs4, kerns4], conv_op)
propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))
time1 = time.time()
time1 = time.perf_counter()
for i in range(repeat):
hidval2_ = propup2(imgval, w_flip)
hidval2 = hidval2_ # [:,:,0::ss[0],0::ss[1]]
tctot += time.time() - time1
tctot += time.perf_counter() - time1
if conv_op_py:
time1 = time.time()
time1 = time.perf_counter()
for i in range(repeat):
hidval3_ = propup3(imgval, w_flip)
hidval3 = hidval3_ # [:,:,0::ss[0],0::ss[1]]
tpytot += time.time() - time1
tpytot += time.perf_counter() - time1
assert (np.abs(hidval2 - hidval3) < 1e-5).all()
else:
tpytot += 0
......@@ -223,7 +223,7 @@ def exec_multilayer_conv_nnet(
(nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))
)
time1 = time.time()
time1 = time.perf_counter()
# outval = np.zeros(np.r_[bsize, outshp])
# ConvOp
......@@ -253,10 +253,10 @@ def exec_multilayer_conv_nnet(
# ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op)
time1 = time.time()
time1 = time.perf_counter()
for i in range(repeat):
propup2(imgval, w_flip)
tctot += time.time() - time1
tctot += time.perf_counter() - time1
imshp = tuple(outshp)
# imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2])
......
......@@ -615,9 +615,9 @@ class TestConv2D(utt.InferShapeTester):
)
)
pytensor_conv = pytensor.function([], output, mode=mode)
t1 = time.time()
t1 = time.perf_counter()
pytensor_conv.vm(n_calls=n_calls)
t2 = time.time()
t2 = time.perf_counter()
print(t2 - t1, end=" ")
print()
......
......@@ -127,9 +127,9 @@ def test_conv3d(border_mode):
np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32")
)
# t0 = time.time()
# t0 = time.perf_counter()
pyres = pyconv3d(signals, filters, border_mode)
# print(time.time() - t0)
# print(time.perf_counter() - t0)
s_signals = shared(signals)
s_filters = shared(filters)
......@@ -146,9 +146,9 @@ def test_conv3d(border_mode):
newconv3d = pytensor.function([], [], updates={s_output: out}, mode=mode)
check_diagonal_subtensor_view_traces(newconv3d)
# t0 = time.time()
# t0 = time.perf_counter()
newconv3d()
# print(time.time() - t0)
# print(time.perf_counter() - t0)
utt.assert_allclose(pyres, s_output.get_value(borrow=True))
gsignals, gfilters = pytensor.grad(out.sum(), [s_signals, s_filters])
gnewconv3d = pytensor.function(
......@@ -160,9 +160,9 @@ def test_conv3d(border_mode):
)
check_diagonal_subtensor_view_traces(gnewconv3d)
# t0 = time.time()
# t0 = time.perf_counter()
gnewconv3d()
# print("grad", time.time() - t0)
# print("grad", time.perf_counter() - t0)
Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2
......@@ -189,9 +189,9 @@ def test_conv3d(border_mode):
np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf, Wf).astype("float32")
)
# t0 = time.time()
# t0 = time.perf_counter()
pyres = pyconv3d(signals, filters, border_mode)
# print(time.time() - t0)
# print(time.perf_counter() - t0)
s_signals = shared(signals)
s_filters = shared(filters)
......@@ -207,9 +207,9 @@ def test_conv3d(border_mode):
newconv3d = pytensor.function([], [], updates={s_output: out}, mode=mode)
# t0 = time.time()
# t0 = time.perf_counter()
newconv3d()
# print(time.time() - t0)
# print(time.perf_counter() - t0)
utt.assert_allclose(pyres, s_output.get_value(borrow=True))
gsignals, gfilters = pytensor.grad(out.sum(), [s_signals, s_filters])
gnewconv3d = pytensor.function(
......@@ -220,9 +220,9 @@ def test_conv3d(border_mode):
name="grad",
)
# t0 = time.time()
# t0 = time.perf_counter()
gnewconv3d()
# print("grad", time.time() - t0)
# print("grad", time.perf_counter() - t0)
Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
Nf, Tf, C, Hf, Wf = 4, 1, 3, 2, 2
......
......@@ -1739,15 +1739,15 @@ class TestFusion:
f = function(list(sym_inputs), g, mode=mode)
for x in range(nb_repeat):
out = f(*val_inputs)
t1 = time.time()
t1 = time.perf_counter()
else:
out = shared_fn(np.zeros(shp, dtype=out_dtype), "out")
assert out.dtype == g.dtype
f = function(sym_inputs, [], updates=[(out, g)], mode=mode)
t0 = time.time()
t0 = time.perf_counter()
for x in range(nb_repeat):
f(*val_inputs)
t1 = time.time()
t1 = time.perf_counter()
out = out.get_value()
times[id] = t1 - t0
......@@ -2331,11 +2331,11 @@ def speed_local_pow_specialize_range():
f1 = function([v], v**i, mode=mode)
f2 = function([v], v**i, mode=mode_without_pow_rewrite)
assert len(f1.maker.fgraph.toposort()) == 1
t1 = time.time()
t1 = time.perf_counter()
f1(val)
t2 = time.time()
t2 = time.perf_counter()
f2(val)
t3 = time.time()
t3 = time.perf_counter()
print(i, t2 - t1, t3 - t2, t2 - t1 < t3 - t2)
if not t2 - t1 < t3 - t2:
print("WARNING WE ARE SLOWER")
......@@ -2343,11 +2343,11 @@ def speed_local_pow_specialize_range():
f1 = function([v], v**i, mode=mode)
f2 = function([v], v**i, mode=mode_without_pow_rewrite)
assert len(f1.maker.fgraph.toposort()) == 1
t1 = time.time()
t1 = time.perf_counter()
f1(val)
t2 = time.time()
t2 = time.perf_counter()
f2(val)
t3 = time.time()
t3 = time.perf_counter()
print(i, t2 - t1, t3 - t2, t2 - t1 < t3 - t2)
if not t2 - t1 < t3 - t2:
print("WARNING WE ARE SLOWER")
......@@ -3119,11 +3119,11 @@ class TestLocalErfc:
f2 = function([x], log(erfc(x)), mode=mode)
print(f1.maker.fgraph.toposort())
print(f2.maker.fgraph.toposort())
t0 = time.time()
t0 = time.perf_counter()
f1(val)
t1 = time.time()
t1 = time.perf_counter()
f2(val)
t2 = time.time()
t2 = time.perf_counter()
print(t1 - t0, t2 - t1)
......
......@@ -114,13 +114,13 @@ def test_merge_opt_runtime():
for i in range(50):
r = r + r / 10
t = time.time()
t = time.perf_counter()
pytensor.function([x], r, mode="FAST_COMPILE")
# FAST_RUN does in-place optimizer which requires a lot of
# toposorting, which is actually pretty slow at the moment. This
# test was designed to test MergeOptimizer... so I'm leaving
# toposort optimizations for a later date.
dt = time.time() - t
dt = time.perf_counter() - t
# it should never take longer than 5 seconds to compile this graph
assert dt < 5.0, dt
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论